1 | use super::{arch::*, utils::*}; |
2 | use crate::{Block, Block8}; |
3 | use cipher::inout::InOut; |
4 | use core::{mem, ptr}; |
5 | |
6 | /// AES-192 round keys |
7 | pub(super) type RoundKeys = [__m128i; 13]; |
8 | |
9 | #[inline ] |
10 | #[target_feature (enable = "aes" )] |
11 | pub(super) unsafe fn encrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { |
12 | let (in_ptr: *const GenericArray>, out_ptr: *mut GenericArray>) = block.into_raw(); |
13 | let mut b: __m128i = _mm_loadu_si128(mem_addr:in_ptr as *const __m128i); |
14 | b = _mm_xor_si128(a:b, b:keys[0]); |
15 | b = _mm_aesenc_si128(a:b, round_key:keys[1]); |
16 | b = _mm_aesenc_si128(a:b, round_key:keys[2]); |
17 | b = _mm_aesenc_si128(a:b, round_key:keys[3]); |
18 | b = _mm_aesenc_si128(a:b, round_key:keys[4]); |
19 | b = _mm_aesenc_si128(a:b, round_key:keys[5]); |
20 | b = _mm_aesenc_si128(a:b, round_key:keys[6]); |
21 | b = _mm_aesenc_si128(a:b, round_key:keys[7]); |
22 | b = _mm_aesenc_si128(a:b, round_key:keys[8]); |
23 | b = _mm_aesenc_si128(a:b, round_key:keys[9]); |
24 | b = _mm_aesenc_si128(a:b, round_key:keys[10]); |
25 | b = _mm_aesenc_si128(a:b, round_key:keys[11]); |
26 | b = _mm_aesenclast_si128(a:b, round_key:keys[12]); |
27 | _mm_storeu_si128(mem_addr:out_ptr as *mut __m128i, a:b); |
28 | } |
29 | |
30 | #[inline ] |
31 | #[target_feature (enable = "aes" )] |
32 | pub(super) unsafe fn encrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { |
33 | let (in_ptr: *const GenericArray, …>, out_ptr: *mut GenericArray, …>) = blocks.into_raw(); |
34 | let mut b: [__m128i; 8] = load8(blocks:in_ptr); |
35 | xor8(&mut b, key:keys[0]); |
36 | aesenc8(&mut b, key:keys[1]); |
37 | aesenc8(&mut b, key:keys[2]); |
38 | aesenc8(&mut b, key:keys[3]); |
39 | aesenc8(&mut b, key:keys[4]); |
40 | aesenc8(&mut b, key:keys[5]); |
41 | aesenc8(&mut b, key:keys[6]); |
42 | aesenc8(&mut b, key:keys[7]); |
43 | aesenc8(&mut b, key:keys[8]); |
44 | aesenc8(&mut b, key:keys[9]); |
45 | aesenc8(&mut b, key:keys[10]); |
46 | aesenc8(&mut b, key:keys[11]); |
47 | aesenclast8(&mut b, key:keys[12]); |
48 | store8(blocks:out_ptr, b); |
49 | } |
50 | |
51 | #[inline ] |
52 | #[target_feature (enable = "aes" )] |
53 | pub(super) unsafe fn decrypt1(keys: &RoundKeys, block: InOut<'_, '_, Block>) { |
54 | let (in_ptr: *const GenericArray>, out_ptr: *mut GenericArray>) = block.into_raw(); |
55 | let mut b: __m128i = _mm_loadu_si128(mem_addr:in_ptr as *const __m128i); |
56 | b = _mm_xor_si128(a:b, b:keys[12]); |
57 | b = _mm_aesdec_si128(a:b, round_key:keys[11]); |
58 | b = _mm_aesdec_si128(a:b, round_key:keys[10]); |
59 | b = _mm_aesdec_si128(a:b, round_key:keys[9]); |
60 | b = _mm_aesdec_si128(a:b, round_key:keys[8]); |
61 | b = _mm_aesdec_si128(a:b, round_key:keys[7]); |
62 | b = _mm_aesdec_si128(a:b, round_key:keys[6]); |
63 | b = _mm_aesdec_si128(a:b, round_key:keys[5]); |
64 | b = _mm_aesdec_si128(a:b, round_key:keys[4]); |
65 | b = _mm_aesdec_si128(a:b, round_key:keys[3]); |
66 | b = _mm_aesdec_si128(a:b, round_key:keys[2]); |
67 | b = _mm_aesdec_si128(a:b, round_key:keys[1]); |
68 | b = _mm_aesdeclast_si128(a:b, round_key:keys[0]); |
69 | _mm_storeu_si128(mem_addr:out_ptr as *mut __m128i, a:b); |
70 | } |
71 | |
72 | #[inline ] |
73 | #[target_feature (enable = "aes" )] |
74 | pub(super) unsafe fn decrypt8(keys: &RoundKeys, blocks: InOut<'_, '_, Block8>) { |
75 | let (in_ptr: *const GenericArray, …>, out_ptr: *mut GenericArray, …>) = blocks.into_raw(); |
76 | let mut b: [__m128i; 8] = load8(blocks:in_ptr); |
77 | xor8(&mut b, key:keys[12]); |
78 | aesdec8(&mut b, key:keys[11]); |
79 | aesdec8(&mut b, key:keys[10]); |
80 | aesdec8(&mut b, key:keys[9]); |
81 | aesdec8(&mut b, key:keys[8]); |
82 | aesdec8(&mut b, key:keys[7]); |
83 | aesdec8(&mut b, key:keys[6]); |
84 | aesdec8(&mut b, key:keys[5]); |
85 | aesdec8(&mut b, key:keys[4]); |
86 | aesdec8(&mut b, key:keys[3]); |
87 | aesdec8(&mut b, key:keys[2]); |
88 | aesdec8(&mut b, key:keys[1]); |
89 | aesdeclast8(&mut b, key:keys[0]); |
90 | store8(blocks:out_ptr, b); |
91 | } |
92 | |
93 | macro_rules! expand_round { |
94 | ($t1:expr, $t3:expr, $round:expr) => {{ |
95 | let mut t1 = $t1; |
96 | let mut t2; |
97 | let mut t3 = $t3; |
98 | let mut t4; |
99 | |
100 | t2 = _mm_aeskeygenassist_si128(t3, $round); |
101 | t2 = _mm_shuffle_epi32(t2, 0x55); |
102 | t4 = _mm_slli_si128(t1, 0x4); |
103 | t1 = _mm_xor_si128(t1, t4); |
104 | t4 = _mm_slli_si128(t4, 0x4); |
105 | t1 = _mm_xor_si128(t1, t4); |
106 | t4 = _mm_slli_si128(t4, 0x4); |
107 | t1 = _mm_xor_si128(t1, t4); |
108 | t1 = _mm_xor_si128(t1, t2); |
109 | t2 = _mm_shuffle_epi32(t1, 0xff); |
110 | t4 = _mm_slli_si128(t3, 0x4); |
111 | t3 = _mm_xor_si128(t3, t4); |
112 | t3 = _mm_xor_si128(t3, t2); |
113 | |
114 | (t1, t3) |
115 | }}; |
116 | } |
117 | |
118 | macro_rules! shuffle { |
119 | ($a:expr, $b:expr, $imm:expr) => { |
120 | mem::transmute::<_, __m128i>(_mm_shuffle_pd(mem::transmute($a), mem::transmute($b), $imm)) |
121 | }; |
122 | } |
123 | |
124 | #[inline ] |
125 | #[target_feature (enable = "aes" )] |
126 | pub(super) unsafe fn expand_key(key: &[u8; 24]) -> RoundKeys { |
127 | // SAFETY: `RoundKeys` is a `[__m128i; 13]` which can be initialized |
128 | // with all zeroes. |
129 | let mut keys: RoundKeys = mem::zeroed(); |
130 | // we are being extra pedantic here to remove out-of-bound access. |
131 | // this should be optimized out into movups, movsd sequence |
132 | // note that unaligned load MUST be used here, even though we read |
133 | // from the array (compiler missoptimizes aligned load) |
134 | let (k0, k1l) = { |
135 | let mut t = [0u8; 32]; |
136 | ptr::write(t.as_mut_ptr() as *mut [u8; 24], *key); |
137 | |
138 | ( |
139 | _mm_loadu_si128(t.as_ptr() as *const __m128i), |
140 | _mm_loadu_si128(t.as_ptr().offset(16) as *const __m128i), |
141 | ) |
142 | }; |
143 | |
144 | keys[0] = k0; |
145 | |
146 | let (k1_2, k2r) = expand_round!(k0, k1l, 0x01); |
147 | keys[1] = shuffle!(k1l, k1_2, 0); |
148 | keys[2] = shuffle!(k1_2, k2r, 1); |
149 | |
150 | let (k3, k4l) = expand_round!(k1_2, k2r, 0x02); |
151 | keys[3] = k3; |
152 | |
153 | let (k4_5, k5r) = expand_round!(k3, k4l, 0x04); |
154 | let k4 = shuffle!(k4l, k4_5, 0); |
155 | let k5 = shuffle!(k4_5, k5r, 1); |
156 | keys[4] = k4; |
157 | keys[5] = k5; |
158 | |
159 | let (k6, k7l) = expand_round!(k4_5, k5r, 0x08); |
160 | keys[6] = k6; |
161 | |
162 | let (k7_8, k8r) = expand_round!(k6, k7l, 0x10); |
163 | keys[7] = shuffle!(k7l, k7_8, 0); |
164 | keys[8] = shuffle!(k7_8, k8r, 1); |
165 | |
166 | let (k9, k10l) = expand_round!(k7_8, k8r, 0x20); |
167 | keys[9] = k9; |
168 | |
169 | let (k10_11, k11r) = expand_round!(k9, k10l, 0x40); |
170 | keys[10] = shuffle!(k10l, k10_11, 0); |
171 | keys[11] = shuffle!(k10_11, k11r, 1); |
172 | |
173 | let (k12, _) = expand_round!(k10_11, k11r, 0x80); |
174 | keys[12] = k12; |
175 | |
176 | keys |
177 | } |
178 | |
179 | #[inline ] |
180 | #[target_feature (enable = "aes" )] |
181 | pub(super) unsafe fn inv_expanded_keys(keys: &RoundKeys) -> RoundKeys { |
182 | [ |
183 | keys[0], |
184 | _mm_aesimc_si128(keys[1]), |
185 | _mm_aesimc_si128(keys[2]), |
186 | _mm_aesimc_si128(keys[3]), |
187 | _mm_aesimc_si128(keys[4]), |
188 | _mm_aesimc_si128(keys[5]), |
189 | _mm_aesimc_si128(keys[6]), |
190 | _mm_aesimc_si128(keys[7]), |
191 | _mm_aesimc_si128(keys[8]), |
192 | _mm_aesimc_si128(keys[9]), |
193 | _mm_aesimc_si128(keys[10]), |
194 | _mm_aesimc_si128(keys[11]), |
195 | keys[12], |
196 | ] |
197 | } |
198 | |