fallback.rs source code [crates/ring/src/aead/gcm/fallback.rs]

1	// Copyright (c) 2019, Google Inc.
2	// Portions Copyright 2020-2024 Brian Smith.
3	//
4	// Permission to use, copy, modify, and/or distribute this software for any
5	// purpose with or without fee is hereby granted, provided that the above
6	// copyright notice and this permission notice appear in all copies.
7	//
8	// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9	// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10	// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
11	// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12	// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
13	// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
14	// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16	// This file is based on BoringSSL's gcm_nohw.c.
17
18	// This file contains a constant-time implementation of GHASH based on the notes
19	// in https://bearssl.org/constanttime.html#ghash-for-gcm and the reduction
20	// algorithm described in
21	// https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf.
22	//
23	// Unlike the BearSSL notes, we use u128 in the 64-bit implementation.
24
25	use super::{ffi::U128, KeyValue, UpdateBlock, UpdateBlocks, Xi, BLOCK_LEN};
26	use crate::polyfill::{slice::AsChunks, ArraySplitMap as _};
27
28	#[derive(Clone)]
29	pub struct Key {
30	h: U128,
31	}
32
33	impl Key {
34	pub(in super::super) fn new(value: KeyValue) -> Self {
35	Self { h: init(value) }
36	}
37	}
38
39	impl UpdateBlock for Key {
40	fn update_block(&self, xi: &mut Xi, a: [u8; BLOCK_LEN]) {
41	xi.bitxor_assign(a);
42	gmult(xi, self.h);
43	}
44	}
45
46	impl UpdateBlocks for Key {
47	fn update_blocks(&self, xi: &mut Xi, input: AsChunks<u8, BLOCK_LEN>) {
48	ghash(xi, self.h, input);
49	}
50	}
51
52	#[cfg(target_pointer_width = "64")]
53	fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) {
54	#[allow(clippy::cast_possible_truncation)]
55	#[inline(always)]
56	fn lo(a: u128) -> u64 {
57	a as u64
58	}
59
60	#[inline(always)]
61	fn hi(a: u128) -> u64 {
62	lo(a >> `64`)
63	}
64
65	#[inline(always)]
66	fn mul(a: u64, b: u64) -> u128 {
67	u128::from(a) * u128::from(b)
68	}
69
70	// One term every four bits means the largest term is 64/4 = 16, which barely
71	// overflows into the next term. Using one term every five bits would cost 25
72	// multiplications instead of 16. It is faster to mask off the bottom four
73	// bits of \|a\|, giving a largest term of 60/4 = 15, and apply the bottom bits
74	// separately.
75	let a0 = a & `0x1111111111111110`;
76	let a1 = a & `0x2222222222222220`;
77	let a2 = a & `0x4444444444444440`;
78	let a3 = a & `0x8888888888888880`;
79
80	let b0 = b & `0x1111111111111111`;
81	let b1 = b & `0x2222222222222222`;
82	let b2 = b & `0x4444444444444444`;
83	let b3 = b & `0x8888888888888888`;
84
85	let c0 = mul(a0, b0) ^ mul(a1, b3) ^ mul(a2, b2) ^ mul(a3, b1);
86	let c1 = mul(a0, b1) ^ mul(a1, b0) ^ mul(a2, b3) ^ mul(a3, b2);
87	let c2 = mul(a0, b2) ^ mul(a1, b1) ^ mul(a2, b0) ^ mul(a3, b3);
88	let c3 = mul(a0, b3) ^ mul(a1, b2) ^ mul(a2, b1) ^ mul(a3, b0);
89
90	// Multiply the bottom four bits of \|a\| with \|b\|.
91	let a0_mask = `0u64`.wrapping_sub(a & `1`);
92	let a1_mask = `0u64`.wrapping_sub((a >> `1`) & `1`);
93	let a2_mask = `0u64`.wrapping_sub((a >> `2`) & `1`);
94	let a3_mask = `0u64`.wrapping_sub((a >> `3`) & `1`);
95	let extra = u128::from(a0_mask & b)
96	^ (u128::from(a1_mask & b) << `1`)
97	^ (u128::from(a2_mask & b) << `2`)
98	^ (u128::from(a3_mask & b) << `3`);
99
100	let lo = (lo(c0) & `0x1111111111111111`)
101	^ (lo(c1) & `0x2222222222222222`)
102	^ (lo(c2) & `0x4444444444444444`)
103	^ (lo(c3) & `0x8888888888888888`)
104	^ lo(extra);
105	let hi = (hi(c0) & `0x1111111111111111`)
106	^ (hi(c1) & `0x2222222222222222`)
107	^ (hi(c2) & `0x4444444444444444`)
108	^ (hi(c3) & `0x8888888888888888`)
109	^ hi(extra);
110	(lo, hi)
111	}
112
113	#[cfg(not(target_pointer_width = "64"))]
114	fn gcm_mul32_nohw(a: u32, b: u32) -> u64 {
115	#[inline(always)]
116	fn mul(a: u32, b: u32) -> u64 {
117	u64::from(a) * u64::from(b)
118	}
119
120	// One term every four bits means the largest term is 32/4 = 8, which does not
121	// overflow into the next term.
122	let a0 = a & `0x11111111`;
123	let a1 = a & `0x22222222`;
124	let a2 = a & `0x44444444`;
125	let a3 = a & `0x88888888`;
126
127	let b0 = b & `0x11111111`;
128	let b1 = b & `0x22222222`;
129	let b2 = b & `0x44444444`;
130	let b3 = b & `0x88888888`;
131
132	let c0 = mul(a0, b0) ^ mul(a1, b3) ^ mul(a2, b2) ^ mul(a3, b1);
133	let c1 = mul(a0, b1) ^ mul(a1, b0) ^ mul(a2, b3) ^ mul(a3, b2);
134	let c2 = mul(a0, b2) ^ mul(a1, b1) ^ mul(a2, b0) ^ mul(a3, b3);
135	let c3 = mul(a0, b3) ^ mul(a1, b2) ^ mul(a2, b1) ^ mul(a3, b0);
136
137	(c0 & `0x1111111111111111`)
138	\| (c1 & `0x2222222222222222`)
139	\| (c2 & `0x4444444444444444`)
140	\| (c3 & `0x8888888888888888`)
141	}
142
143	#[cfg(not(target_pointer_width = "64"))]
144	fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) {
145	#[inline(always)]
146	fn lo(a: u64) -> u32 {
147	a as u32
148	}
149	#[inline(always)]
150	fn hi(a: u64) -> u32 {
151	lo(a >> `32`)
152	}
153
154	let a0 = lo(a);
155	let a1 = hi(a);
156	let b0 = lo(b);
157	let b1 = hi(b);
158	// Karatsuba multiplication.
159	let lo = gcm_mul32_nohw(a0, b0);
160	let hi = gcm_mul32_nohw(a1, b1);
161	let mid = gcm_mul32_nohw(a0 ^ a1, b0 ^ b1) ^ lo ^ hi;
162	(lo ^ (mid << `32`), hi ^ (mid >> `32`))
163	}
164
165	fn init(value: KeyValue) -> U128 {
166	let xi = value.into_inner();
167
168	// We implement GHASH in terms of POLYVAL, as described in RFC 8452. This
169	// avoids a shift by 1 in the multiplication, needed to account for bit
170	// reversal losing a bit after multiplication, that is,
171	// rev128(X) rev128(Y) = rev255(XY).
172	//
173	// Per Appendix A, we run mulX_POLYVAL. Note this is the same transformation
174	// applied by \|gcm_init_clmul\|, etc. Note \|Xi\| has already been byteswapped.
175	//
176	// See also slide 16 of
177	// https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf
178	let mut lo = xi[`1`];
179	let mut hi = xi[`0`];
180
181	let mut carry = hi >> `63`;
182	carry = `0u64`.wrapping_sub(carry);
183
184	hi <<= `1`;
185	hi \|= lo >> `63`;
186	lo <<= `1`;
187
188	// The irreducible polynomial is 1 + x^121 + x^126 + x^127 + x^128, so we
189	// conditionally add 0xc200...0001.
190	lo ^= carry & `1`;
191	hi ^= carry & `0xc200000000000000`;
192
193	// This implementation does not use the rest of \|Htable\|.
194	U128 { hi, lo }
195	}
196
197	fn gcm_polyval_nohw(xi: &mut [u64; `2`], h: U128) {
198	// Karatsuba multiplication. The product of \|Xi\| and \|H\| is stored in \|r0\|
199	// through \|r3\|. Note there is no byte or bit reversal because we are
200	// evaluating POLYVAL.
201	let (r0, mut r1) = gcm_mul64_nohw(xi[`0`], h.lo);
202	let (mut r2, mut r3) = gcm_mul64_nohw(xi[`1`], h.hi);
203	let (mut mid0, mut mid1) = gcm_mul64_nohw(xi[`0`] ^ xi[`1`], h.hi ^ h.lo);
204	mid0 ^= r0 ^ r2;
205	mid1 ^= r1 ^ r3;
206	r2 ^= mid1;
207	r1 ^= mid0;
208
209	// Now we multiply our 256-bit result by x^-128 and reduce. \|r2\| and
210	// \|r3\| shifts into position and we must multiply \|r0\| and \|r1\| by x^-128. We
211	// have:
212	//
213	// 1 = x^121 + x^126 + x^127 + x^128
214	// x^-128 = x^-7 + x^-2 + x^-1 + 1
215	//
216	// This is the GHASH reduction step, but with bits flowing in reverse.
217
218	// The x^-7, x^-2, and x^-1 terms shift bits past x^0, which would require
219	// another reduction steps. Instead, we gather the excess bits, incorporate
220	// them into \|r0\| and \|r1\| and reduce once. See slides 17-19
221	// of https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf.
222	r1 ^= (r0 << `63`) ^ (r0 << `62`) ^ (r0 << `57`);
223
224	// 1
225	r2 ^= r0;
226	r3 ^= r1;
227
228	// x^-1
229	r2 ^= r0 >> `1`;
230	r2 ^= r1 << `63`;
231	r3 ^= r1 >> `1`;
232
233	// x^-2
234	r2 ^= r0 >> `2`;
235	r2 ^= r1 << `62`;
236	r3 ^= r1 >> `2`;
237
238	// x^-7
239	r2 ^= r0 >> `7`;
240	r2 ^= r1 << `57`;
241	r3 ^= r1 >> `7`;
242
243	*xi = [r2, r3];
244	}
245
246	fn gmult(xi: &mut Xi, h: U128) {
247	with_swapped_xi(xi, \|swapped: &mut [u64; 2]\| {
248	gcm_polyval_nohw(xi:swapped, h);
249	})
250	}
251
252	fn ghash(xi: &mut Xi, h: U128, input: AsChunks<u8, BLOCK_LEN>) {
253	with_swapped_xi(xi, \|swapped: &mut [u64; 2]\| {
254	input.into_iter().for_each(\|&input: [u8; 16]\| {
255	let input: [u64; _] = input.array_split_map(u64::from_be_bytes);
256	swapped[`0`] ^= input[`1`];
257	swapped[`1`] ^= input[`0`];
258	gcm_polyval_nohw(xi:swapped, h);
259	});
260	});
261	}
262
263	#[inline]
264	fn with_swapped_xi(Xi(xi: &mut [u8; 16]): &mut Xi, f: impl FnOnce(&mut [u64; `2`])) {
265	let unswapped: [u64; `2`] = xi.array_split_map(u64::from_be_bytes);
266	let mut swapped: [u64; `2`] = [unswapped[`1`], unswapped[`0`]];
267	f(&mut swapped);
268	let (xi_0: &mut [u8], xi_1: &mut [u8]) = xi.split_at_mut(BLOCK_LEN / `2`);
269	xi_0.copy_from_slice(&u64::to_be_bytes(self:swapped[`1`]));
270	xi_1.copy_from_slice(&u64::to_be_bytes(self:swapped[`0`]));
271	}
272