1// Copyright 2018 Brian Smith.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY
10// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
12// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
13// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15use super::{
16 aes_gcm,
17 block::{Block, BLOCK_LEN},
18 Aad,
19};
20use crate::{
21 bits::{BitLength, FromUsizeBytes},
22 cpu, error,
23 polyfill::ArraySplitMap,
24};
25use core::ops::BitXorAssign;
26
27mod gcm_nohw;
28
29#[derive(Clone)]
30pub struct Key {
31 h_table: HTable,
32}
33
34impl Key {
35 pub(super) fn new(h_be: Block, cpu_features: cpu::Features) -> Self {
36 let h: [u64; 2] = h_be.as_ref().array_split_map(u64::from_be_bytes);
37
38 let mut key = Self {
39 h_table: HTable {
40 Htable: [u128 { hi: 0, lo: 0 }; HTABLE_LEN],
41 },
42 };
43 let h_table = &mut key.h_table;
44
45 match detect_implementation(cpu_features) {
46 #[cfg(target_arch = "x86_64")]
47 Implementation::CLMUL if has_avx_movbe(cpu_features) => {
48 prefixed_extern! {
49 fn gcm_init_avx(HTable: &mut HTable, h: &[u64; 2]);
50 }
51 unsafe {
52 gcm_init_avx(h_table, &h);
53 }
54 }
55
56 #[cfg(any(
57 target_arch = "aarch64",
58 target_arch = "arm",
59 target_arch = "x86_64",
60 target_arch = "x86"
61 ))]
62 Implementation::CLMUL => {
63 prefixed_extern! {
64 fn gcm_init_clmul(Htable: &mut HTable, h: &[u64; 2]);
65 }
66 unsafe {
67 gcm_init_clmul(h_table, &h);
68 }
69 }
70
71 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
72 Implementation::NEON => {
73 prefixed_extern! {
74 fn gcm_init_neon(Htable: &mut HTable, h: &[u64; 2]);
75 }
76 unsafe {
77 gcm_init_neon(h_table, &h);
78 }
79 }
80
81 Implementation::Fallback => {
82 h_table.Htable[0] = gcm_nohw::init(h);
83 }
84 }
85
86 key
87 }
88}
89
90pub struct Context {
91 inner: ContextInner,
92 aad_len: BitLength<u64>,
93 in_out_len: BitLength<u64>,
94 cpu_features: cpu::Features,
95}
96
97impl Context {
98 pub(crate) fn new(
99 key: &Key,
100 aad: Aad<&[u8]>,
101 in_out_len: usize,
102 cpu_features: cpu::Features,
103 ) -> Result<Self, error::Unspecified> {
104 if in_out_len > aes_gcm::MAX_IN_OUT_LEN {
105 return Err(error::Unspecified);
106 }
107
108 // NIST SP800-38D Section 5.2.1.1 says that the maximum AAD length is
109 // 2**64 - 1 bits, i.e. BitLength<u64>::MAX, so we don't need to do an
110 // explicit check here.
111
112 let mut ctx = Self {
113 inner: ContextInner {
114 Xi: Xi(Block::zero()),
115 Htable: key.h_table.clone(),
116 },
117 aad_len: BitLength::from_usize_bytes(aad.as_ref().len())?,
118 in_out_len: BitLength::from_usize_bytes(in_out_len)?,
119 cpu_features,
120 };
121
122 for ad in aad.0.chunks(BLOCK_LEN) {
123 let mut block = Block::zero();
124 block.overwrite_part_at(0, ad);
125 ctx.update_block(block);
126 }
127
128 Ok(ctx)
129 }
130
131 #[cfg(all(target_arch = "aarch64", target_pointer_width = "64"))]
132 pub(super) fn in_out_whole_block_bits(&self) -> BitLength<usize> {
133 use crate::polyfill::usize_from_u64;
134 const WHOLE_BLOCK_BITS_MASK: usize = !0b111_1111;
135 const _WHOLE_BLOCK_BITS_MASK_CORRECT: () =
136 assert!(WHOLE_BLOCK_BITS_MASK == !((BLOCK_LEN * 8) - 1));
137 BitLength::from_usize_bits(
138 usize_from_u64(self.in_out_len.as_bits()) & WHOLE_BLOCK_BITS_MASK,
139 )
140 }
141
142 /// Access to `inner` for the integrated AES-GCM implementations only.
143 #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
144 #[inline]
145 pub(super) fn inner(&mut self) -> (&HTable, &mut Xi) {
146 (&self.inner.Htable, &mut self.inner.Xi)
147 }
148
149 pub fn update_blocks(&mut self, input: &[u8]) {
150 // Th assembly functions take the input length in bytes, not blocks.
151 let input_bytes = input.len();
152
153 debug_assert_eq!(input_bytes % BLOCK_LEN, 0);
154 debug_assert!(input_bytes > 0);
155
156 let input = input.as_ptr().cast::<[u8; BLOCK_LEN]>();
157 // SAFETY:
158 // - `[[u8; BLOCK_LEN]]` has the same bit validity as `[u8]`.
159 // - `[[u8; BLOCK_LEN]]` has the same alignment requirement as `[u8]`.
160 // - `input_bytes / BLOCK_LEN` ensures that the total length in bytes of
161 // the new `[[u8; BLOCK_LEN]]` will not be longer than the original
162 // `[u8]`.
163 let input = unsafe { core::slice::from_raw_parts(input, input_bytes / BLOCK_LEN) };
164
165 let xi = &mut self.inner.Xi;
166 let h_table = &self.inner.Htable;
167
168 match detect_implementation(self.cpu_features) {
169 #[cfg(target_arch = "x86_64")]
170 Implementation::CLMUL if has_avx_movbe(self.cpu_features) => {
171 prefixed_extern! {
172 fn gcm_ghash_avx(
173 xi: &mut Xi,
174 Htable: &HTable,
175 inp: *const [u8; BLOCK_LEN],
176 len: crate::c::size_t,
177 );
178 }
179 unsafe {
180 gcm_ghash_avx(xi, h_table, input.as_ptr(), input_bytes);
181 }
182 }
183
184 #[cfg(any(
185 target_arch = "aarch64",
186 target_arch = "arm",
187 target_arch = "x86_64",
188 target_arch = "x86"
189 ))]
190 Implementation::CLMUL => {
191 prefixed_extern! {
192 fn gcm_ghash_clmul(
193 xi: &mut Xi,
194 Htable: &HTable,
195 inp: *const [u8; BLOCK_LEN],
196 len: crate::c::size_t,
197 );
198 }
199 unsafe {
200 gcm_ghash_clmul(xi, h_table, input.as_ptr(), input_bytes);
201 }
202 }
203
204 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
205 Implementation::NEON => {
206 prefixed_extern! {
207 fn gcm_ghash_neon(
208 xi: &mut Xi,
209 Htable: &HTable,
210 inp: *const [u8; BLOCK_LEN],
211 len: crate::c::size_t,
212 );
213 }
214 unsafe {
215 gcm_ghash_neon(xi, h_table, input.as_ptr(), input_bytes);
216 }
217 }
218
219 Implementation::Fallback => {
220 gcm_nohw::ghash(xi, h_table.Htable[0], input);
221 }
222 }
223 }
224
225 pub fn update_block(&mut self, a: Block) {
226 self.inner.Xi.bitxor_assign(a);
227
228 // Although these functions take `Xi` and `h_table` as separate
229 // parameters, one or more of them might assume that they are part of
230 // the same `ContextInner` structure.
231 let xi = &mut self.inner.Xi;
232 let h_table = &self.inner.Htable;
233
234 match detect_implementation(self.cpu_features) {
235 #[cfg(any(
236 target_arch = "aarch64",
237 target_arch = "arm",
238 target_arch = "x86_64",
239 target_arch = "x86"
240 ))]
241 Implementation::CLMUL => {
242 prefixed_extern! {
243 fn gcm_gmult_clmul(xi: &mut Xi, Htable: &HTable);
244 }
245 unsafe {
246 gcm_gmult_clmul(xi, h_table);
247 }
248 }
249
250 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
251 Implementation::NEON => {
252 prefixed_extern! {
253 fn gcm_gmult_neon(xi: &mut Xi, Htable: &HTable);
254 }
255 unsafe {
256 gcm_gmult_neon(xi, h_table);
257 }
258 }
259
260 Implementation::Fallback => {
261 gcm_nohw::gmult(xi, h_table.Htable[0]);
262 }
263 }
264 }
265
266 pub(super) fn pre_finish<F>(mut self, f: F) -> super::Tag
267 where
268 F: FnOnce(Block, cpu::Features) -> super::Tag,
269 {
270 self.update_block(Block::from(
271 [self.aad_len.as_bits(), self.in_out_len.as_bits()].map(u64::to_be_bytes),
272 ));
273
274 f(self.inner.Xi.0, self.cpu_features)
275 }
276
277 #[cfg(target_arch = "x86_64")]
278 pub(super) fn is_avx(&self) -> bool {
279 match detect_implementation(self.cpu_features) {
280 Implementation::CLMUL => has_avx_movbe(self.cpu_features),
281 _ => false,
282 }
283 }
284
285 #[cfg(target_arch = "aarch64")]
286 pub(super) fn is_clmul(&self) -> bool {
287 matches!(
288 detect_implementation(self.cpu_features),
289 Implementation::CLMUL
290 )
291 }
292}
293
294// The alignment is required by non-Rust code that uses `GCM128_CONTEXT`.
295#[derive(Clone)]
296#[repr(C, align(16))]
297pub(super) struct HTable {
298 Htable: [u128; HTABLE_LEN],
299}
300
301#[derive(Clone, Copy)]
302#[repr(C)]
303struct u128 {
304 hi: u64,
305 lo: u64,
306}
307
308const HTABLE_LEN: usize = 16;
309
310#[repr(transparent)]
311pub struct Xi(Block);
312
313impl BitXorAssign<Block> for Xi {
314 #[inline]
315 fn bitxor_assign(&mut self, a: Block) {
316 self.0 ^= a;
317 }
318}
319
320impl From<Xi> for Block {
321 #[inline]
322 fn from(Xi(block: Block): Xi) -> Self {
323 block
324 }
325}
326
327// This corresponds roughly to the `GCM128_CONTEXT` structure in BoringSSL.
328// Some assembly language code, in particular the MOVEBE+AVX2 X86-64
329// implementation, requires this exact layout.
330#[repr(C, align(16))]
331struct ContextInner {
332 Xi: Xi,
333 Htable: HTable,
334}
335
336#[allow(clippy::upper_case_acronyms)]
337enum Implementation {
338 #[cfg(any(
339 target_arch = "aarch64",
340 target_arch = "arm",
341 target_arch = "x86_64",
342 target_arch = "x86"
343 ))]
344 CLMUL,
345
346 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
347 NEON,
348
349 Fallback,
350}
351
352#[inline]
353fn detect_implementation(cpu_features: cpu::Features) -> Implementation {
354 // `cpu_features` is only used for specific platforms.
355 #[cfg(not(any(
356 target_arch = "aarch64",
357 target_arch = "arm",
358 target_arch = "x86_64",
359 target_arch = "x86"
360 )))]
361 let _cpu_features = cpu_features;
362
363 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
364 {
365 if cpu::arm::PMULL.available(cpu_features) {
366 return Implementation::CLMUL;
367 }
368 }
369
370 #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
371 {
372 if cpu::intel::FXSR.available(cpu_features) && cpu::intel::PCLMULQDQ.available(cpu_features)
373 {
374 return Implementation::CLMUL;
375 }
376 }
377
378 #[cfg(any(target_arch = "aarch64", target_arch = "arm"))]
379 {
380 if cpu::arm::NEON.available(cpu_features) {
381 return Implementation::NEON;
382 }
383 }
384
385 Implementation::Fallback
386}
387
388#[cfg(target_arch = "x86_64")]
389fn has_avx_movbe(cpu_features: cpu::Features) -> bool {
390 cpu::intel::AVX.available(cpu_features) && cpu::intel::MOVBE.available(cpu_features)
391}
392