1 | // Copyright 2018 Brian Smith. |
2 | // |
3 | // Permission to use, copy, modify, and/or distribute this software for any |
4 | // purpose with or without fee is hereby granted, provided that the above |
5 | // copyright notice and this permission notice appear in all copies. |
6 | // |
7 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
8 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
9 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
10 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
11 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
12 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
13 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
14 | |
15 | use super::{ |
16 | aes_gcm, |
17 | block::{Block, BLOCK_LEN}, |
18 | Aad, |
19 | }; |
20 | use crate::{ |
21 | bits::{BitLength, FromUsizeBytes}, |
22 | cpu, error, |
23 | polyfill::ArraySplitMap, |
24 | }; |
25 | use core::ops::BitXorAssign; |
26 | |
27 | mod gcm_nohw; |
28 | |
29 | #[derive (Clone)] |
30 | pub struct Key { |
31 | h_table: HTable, |
32 | } |
33 | |
34 | impl Key { |
35 | pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self { |
36 | let h: [u64; 2] = h_be.as_ref().array_split_map(u64::from_be_bytes); |
37 | |
38 | let mut key = Self { |
39 | h_table: HTable { |
40 | Htable: [u128 { hi: 0, lo: 0 }; HTABLE_LEN], |
41 | }, |
42 | }; |
43 | let h_table = &mut key.h_table; |
44 | |
45 | match detect_implementation(cpu_features) { |
46 | #[cfg (target_arch = "x86_64" )] |
47 | Implementation::CLMUL if has_avx_movbe(cpu_features) => { |
48 | prefixed_extern! { |
49 | fn gcm_init_avx(HTable: &mut HTable, h: &[u64; 2]); |
50 | } |
51 | unsafe { |
52 | gcm_init_avx(h_table, &h); |
53 | } |
54 | } |
55 | |
56 | #[cfg (any( |
57 | target_arch = "aarch64" , |
58 | target_arch = "arm" , |
59 | target_arch = "x86_64" , |
60 | target_arch = "x86" |
61 | ))] |
62 | Implementation::CLMUL => { |
63 | prefixed_extern! { |
64 | fn gcm_init_clmul(Htable: &mut HTable, h: &[u64; 2]); |
65 | } |
66 | unsafe { |
67 | gcm_init_clmul(h_table, &h); |
68 | } |
69 | } |
70 | |
71 | #[cfg (any(target_arch = "aarch64" , target_arch = "arm" ))] |
72 | Implementation::NEON => { |
73 | prefixed_extern! { |
74 | fn gcm_init_neon(Htable: &mut HTable, h: &[u64; 2]); |
75 | } |
76 | unsafe { |
77 | gcm_init_neon(h_table, &h); |
78 | } |
79 | } |
80 | |
81 | Implementation::Fallback => { |
82 | h_table.Htable[0] = gcm_nohw::init(h); |
83 | } |
84 | } |
85 | |
86 | key |
87 | } |
88 | } |
89 | |
90 | pub struct Context { |
91 | inner: ContextInner, |
92 | aad_len: BitLength<u64>, |
93 | in_out_len: BitLength<u64>, |
94 | cpu_features: cpu::Features, |
95 | } |
96 | |
97 | impl Context { |
98 | pub(crate) fn new( |
99 | key: &Key, |
100 | aad: Aad<&[u8]>, |
101 | in_out_len: usize, |
102 | cpu_features: cpu::Features, |
103 | ) -> Result<Self, error::Unspecified> { |
104 | if in_out_len > aes_gcm::MAX_IN_OUT_LEN { |
105 | return Err(error::Unspecified); |
106 | } |
107 | |
108 | // NIST SP800-38D Section 5.2.1.1 says that the maximum AAD length is |
109 | // 2**64 - 1 bits, i.e. BitLength<u64>::MAX, so we don't need to do an |
110 | // explicit check here. |
111 | |
112 | let mut ctx = Self { |
113 | inner: ContextInner { |
114 | Xi: Xi(Block::zero()), |
115 | Htable: key.h_table.clone(), |
116 | }, |
117 | aad_len: BitLength::from_usize_bytes(aad.as_ref().len())?, |
118 | in_out_len: BitLength::from_usize_bytes(in_out_len)?, |
119 | cpu_features, |
120 | }; |
121 | |
122 | for ad in aad.0.chunks(BLOCK_LEN) { |
123 | let mut block = Block::zero(); |
124 | block.overwrite_part_at(0, ad); |
125 | ctx.update_block(block); |
126 | } |
127 | |
128 | Ok(ctx) |
129 | } |
130 | |
131 | #[cfg (all(target_arch = "aarch64" , target_pointer_width = "64" ))] |
132 | pub(super) fn in_out_whole_block_bits(&self) -> BitLength<usize> { |
133 | use crate::polyfill::usize_from_u64; |
134 | const WHOLE_BLOCK_BITS_MASK: usize = !0b111_1111; |
135 | const _WHOLE_BLOCK_BITS_MASK_CORRECT: () = |
136 | assert!(WHOLE_BLOCK_BITS_MASK == !((BLOCK_LEN * 8) - 1)); |
137 | BitLength::from_usize_bits( |
138 | usize_from_u64(self.in_out_len.as_bits()) & WHOLE_BLOCK_BITS_MASK, |
139 | ) |
140 | } |
141 | |
142 | /// Access to `inner` for the integrated AES-GCM implementations only. |
143 | #[cfg (any(target_arch = "x86_64" , target_arch = "aarch64" ))] |
144 | #[inline ] |
145 | pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) { |
146 | (&self.inner.Htable, &mut self.inner.Xi) |
147 | } |
148 | |
149 | pub fn update_blocks(&mut self, input: &[u8]) { |
150 | // Th assembly functions take the input length in bytes, not blocks. |
151 | let input_bytes = input.len(); |
152 | |
153 | debug_assert_eq!(input_bytes % BLOCK_LEN, 0); |
154 | debug_assert!(input_bytes > 0); |
155 | |
156 | let input = input.as_ptr().cast::<[u8; BLOCK_LEN]>(); |
157 | // SAFETY: |
158 | // - `[[u8; BLOCK_LEN]]` has the same bit validity as `[u8]`. |
159 | // - `[[u8; BLOCK_LEN]]` has the same alignment requirement as `[u8]`. |
160 | // - `input_bytes / BLOCK_LEN` ensures that the total length in bytes of |
161 | // the new `[[u8; BLOCK_LEN]]` will not be longer than the original |
162 | // `[u8]`. |
163 | let input = unsafe { core::slice::from_raw_parts(input, input_bytes / BLOCK_LEN) }; |
164 | |
165 | let xi = &mut self.inner.Xi; |
166 | let h_table = &self.inner.Htable; |
167 | |
168 | match detect_implementation(self.cpu_features) { |
169 | #[cfg (target_arch = "x86_64" )] |
170 | Implementation::CLMUL if has_avx_movbe(self.cpu_features) => { |
171 | prefixed_extern! { |
172 | fn gcm_ghash_avx( |
173 | xi: &mut Xi, |
174 | Htable: &HTable, |
175 | inp: *const [u8; BLOCK_LEN], |
176 | len: crate::c::size_t, |
177 | ); |
178 | } |
179 | unsafe { |
180 | gcm_ghash_avx(xi, h_table, input.as_ptr(), input_bytes); |
181 | } |
182 | } |
183 | |
184 | #[cfg (any( |
185 | target_arch = "aarch64" , |
186 | target_arch = "arm" , |
187 | target_arch = "x86_64" , |
188 | target_arch = "x86" |
189 | ))] |
190 | Implementation::CLMUL => { |
191 | prefixed_extern! { |
192 | fn gcm_ghash_clmul( |
193 | xi: &mut Xi, |
194 | Htable: &HTable, |
195 | inp: *const [u8; BLOCK_LEN], |
196 | len: crate::c::size_t, |
197 | ); |
198 | } |
199 | unsafe { |
200 | gcm_ghash_clmul(xi, h_table, input.as_ptr(), input_bytes); |
201 | } |
202 | } |
203 | |
204 | #[cfg (any(target_arch = "aarch64" , target_arch = "arm" ))] |
205 | Implementation::NEON => { |
206 | prefixed_extern! { |
207 | fn gcm_ghash_neon( |
208 | xi: &mut Xi, |
209 | Htable: &HTable, |
210 | inp: *const [u8; BLOCK_LEN], |
211 | len: crate::c::size_t, |
212 | ); |
213 | } |
214 | unsafe { |
215 | gcm_ghash_neon(xi, h_table, input.as_ptr(), input_bytes); |
216 | } |
217 | } |
218 | |
219 | Implementation::Fallback => { |
220 | gcm_nohw::ghash(xi, h_table.Htable[0], input); |
221 | } |
222 | } |
223 | } |
224 | |
225 | pub fn update_block(&mut self, a: Block) { |
226 | self.inner.Xi.bitxor_assign(a); |
227 | |
228 | // Although these functions take `Xi` and `h_table` as separate |
229 | // parameters, one or more of them might assume that they are part of |
230 | // the same `ContextInner` structure. |
231 | let xi = &mut self.inner.Xi; |
232 | let h_table = &self.inner.Htable; |
233 | |
234 | match detect_implementation(self.cpu_features) { |
235 | #[cfg (any( |
236 | target_arch = "aarch64" , |
237 | target_arch = "arm" , |
238 | target_arch = "x86_64" , |
239 | target_arch = "x86" |
240 | ))] |
241 | Implementation::CLMUL => { |
242 | prefixed_extern! { |
243 | fn gcm_gmult_clmul(xi: &mut Xi, Htable: &HTable); |
244 | } |
245 | unsafe { |
246 | gcm_gmult_clmul(xi, h_table); |
247 | } |
248 | } |
249 | |
250 | #[cfg (any(target_arch = "aarch64" , target_arch = "arm" ))] |
251 | Implementation::NEON => { |
252 | prefixed_extern! { |
253 | fn gcm_gmult_neon(xi: &mut Xi, Htable: &HTable); |
254 | } |
255 | unsafe { |
256 | gcm_gmult_neon(xi, h_table); |
257 | } |
258 | } |
259 | |
260 | Implementation::Fallback => { |
261 | gcm_nohw::gmult(xi, h_table.Htable[0]); |
262 | } |
263 | } |
264 | } |
265 | |
266 | pub(super) fn pre_finish<F>(mut self, f: F) -> super::Tag |
267 | where |
268 | F: FnOnce(Block, cpu::Features) -> super::Tag, |
269 | { |
270 | self.update_block(Block::from( |
271 | [self.aad_len.as_bits(), self.in_out_len.as_bits()].map(u64::to_be_bytes), |
272 | )); |
273 | |
274 | f(self.inner.Xi.0, self.cpu_features) |
275 | } |
276 | |
277 | #[cfg (target_arch = "x86_64" )] |
278 | pub(super) fn is_avx(&self) -> bool { |
279 | match detect_implementation(self.cpu_features) { |
280 | Implementation::CLMUL => has_avx_movbe(self.cpu_features), |
281 | _ => false, |
282 | } |
283 | } |
284 | |
285 | #[cfg (target_arch = "aarch64" )] |
286 | pub(super) fn is_clmul(&self) -> bool { |
287 | matches!( |
288 | detect_implementation(self.cpu_features), |
289 | Implementation::CLMUL |
290 | ) |
291 | } |
292 | } |
293 | |
294 | // The alignment is required by non-Rust code that uses `GCM128_CONTEXT`. |
295 | #[derive (Clone)] |
296 | #[repr (C, align(16))] |
297 | pub(super) struct HTable { |
298 | Htable: [u128; HTABLE_LEN], |
299 | } |
300 | |
301 | #[derive (Clone, Copy)] |
302 | #[repr (C)] |
303 | struct u128 { |
304 | hi: u64, |
305 | lo: u64, |
306 | } |
307 | |
308 | const HTABLE_LEN: usize = 16; |
309 | |
310 | #[repr (transparent)] |
311 | pub struct Xi(Block); |
312 | |
313 | impl BitXorAssign<Block> for Xi { |
314 | #[inline ] |
315 | fn bitxor_assign(&mut self, a: Block) { |
316 | self.0 ^= a; |
317 | } |
318 | } |
319 | |
320 | impl From<Xi> for Block { |
321 | #[inline ] |
322 | fn from(Xi(block: Block): Xi) -> Self { |
323 | block |
324 | } |
325 | } |
326 | |
327 | // This corresponds roughly to the `GCM128_CONTEXT` structure in BoringSSL. |
328 | // Some assembly language code, in particular the MOVEBE+AVX2 X86-64 |
329 | // implementation, requires this exact layout. |
330 | #[repr (C, align(16))] |
331 | struct ContextInner { |
332 | Xi: Xi, |
333 | Htable: HTable, |
334 | } |
335 | |
336 | #[allow (clippy::upper_case_acronyms)] |
337 | enum Implementation { |
338 | #[cfg (any( |
339 | target_arch = "aarch64" , |
340 | target_arch = "arm" , |
341 | target_arch = "x86_64" , |
342 | target_arch = "x86" |
343 | ))] |
344 | CLMUL, |
345 | |
346 | #[cfg (any(target_arch = "aarch64" , target_arch = "arm" ))] |
347 | NEON, |
348 | |
349 | Fallback, |
350 | } |
351 | |
352 | #[inline ] |
353 | fn detect_implementation(cpu_features: cpu::Features) -> Implementation { |
354 | // `cpu_features` is only used for specific platforms. |
355 | #[cfg (not(any( |
356 | target_arch = "aarch64" , |
357 | target_arch = "arm" , |
358 | target_arch = "x86_64" , |
359 | target_arch = "x86" |
360 | )))] |
361 | let _cpu_features = cpu_features; |
362 | |
363 | #[cfg (any(target_arch = "aarch64" , target_arch = "arm" ))] |
364 | { |
365 | if cpu::arm::PMULL.available(cpu_features) { |
366 | return Implementation::CLMUL; |
367 | } |
368 | } |
369 | |
370 | #[cfg (any(target_arch = "x86_64" , target_arch = "x86" ))] |
371 | { |
372 | if cpu::intel::FXSR.available(cpu_features) && cpu::intel::PCLMULQDQ.available(cpu_features) |
373 | { |
374 | return Implementation::CLMUL; |
375 | } |
376 | } |
377 | |
378 | #[cfg (any(target_arch = "aarch64" , target_arch = "arm" ))] |
379 | { |
380 | if cpu::arm::NEON.available(cpu_features) { |
381 | return Implementation::NEON; |
382 | } |
383 | } |
384 | |
385 | Implementation::Fallback |
386 | } |
387 | |
388 | #[cfg (target_arch = "x86_64" )] |
389 | fn has_avx_movbe(cpu_features: cpu::Features) -> bool { |
390 | cpu::intel::AVX.available(cpu_features) && cpu::intel::MOVBE.available(cpu_features) |
391 | } |
392 | |