gcm_nohw.rs source code [crates/ring-0.17.8/src/aead/gcm/gcm_nohw.rs]

1	// Copyright (c) 2019, Google Inc.
2	// Portions Copyright 2020 Brian Smith.
3	//
4	// Permission to use, copy, modify, and/or distribute this software for any
5	// purpose with or without fee is hereby granted, provided that the above
6	// copyright notice and this permission notice appear in all copies.
7	//
8	// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9	// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10	// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
11	// SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12	// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
13	// OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
14	// CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15
16	// This file is based on BoringSSL's gcm_nohw.c.
17
18	// This file contains a constant-time implementation of GHASH based on the notes
19	// in https://bearssl.org/constanttime.html#ghash-for-gcm and the reduction
20	// algorithm described in
21	// https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf.
22	//
23	// Unlike the BearSSL notes, we use u128 in the 64-bit implementation.
24
25	use super::{Block, Xi, BLOCK_LEN};
26	use crate::polyfill::ArraySplitMap;
27
28	#[cfg(target_pointer_width = "64")]
29	fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) {
30	#[allow(clippy::cast_possible_truncation)]
31	#[inline(always)]
32	fn lo(a: u128) -> u64 {
33	a as u64
34	}
35
36	#[inline(always)]
37	fn hi(a: u128) -> u64 {
38	lo(a >> `64`)
39	}
40
41	#[inline(always)]
42	fn mul(a: u64, b: u64) -> u128 {
43	u128::from(a) * u128::from(b)
44	}
45
46	// One term every four bits means the largest term is 64/4 = 16, which barely
47	// overflows into the next term. Using one term every five bits would cost 25
48	// multiplications instead of 16. It is faster to mask off the bottom four
49	// bits of \|a\|, giving a largest term of 60/4 = 15, and apply the bottom bits
50	// separately.
51	let a0 = a & `0x1111111111111110`;
52	let a1 = a & `0x2222222222222220`;
53	let a2 = a & `0x4444444444444440`;
54	let a3 = a & `0x8888888888888880`;
55
56	let b0 = b & `0x1111111111111111`;
57	let b1 = b & `0x2222222222222222`;
58	let b2 = b & `0x4444444444444444`;
59	let b3 = b & `0x8888888888888888`;
60
61	let c0 = mul(a0, b0) ^ mul(a1, b3) ^ mul(a2, b2) ^ mul(a3, b1);
62	let c1 = mul(a0, b1) ^ mul(a1, b0) ^ mul(a2, b3) ^ mul(a3, b2);
63	let c2 = mul(a0, b2) ^ mul(a1, b1) ^ mul(a2, b0) ^ mul(a3, b3);
64	let c3 = mul(a0, b3) ^ mul(a1, b2) ^ mul(a2, b1) ^ mul(a3, b0);
65
66	// Multiply the bottom four bits of \|a\| with \|b\|.
67	let a0_mask = `0u64`.wrapping_sub(a & `1`);
68	let a1_mask = `0u64`.wrapping_sub((a >> `1`) & `1`);
69	let a2_mask = `0u64`.wrapping_sub((a >> `2`) & `1`);
70	let a3_mask = `0u64`.wrapping_sub((a >> `3`) & `1`);
71	let extra = u128::from(a0_mask & b)
72	^ (u128::from(a1_mask & b) << `1`)
73	^ (u128::from(a2_mask & b) << `2`)
74	^ (u128::from(a3_mask & b) << `3`);
75
76	let lo = (lo(c0) & `0x1111111111111111`)
77	^ (lo(c1) & `0x2222222222222222`)
78	^ (lo(c2) & `0x4444444444444444`)
79	^ (lo(c3) & `0x8888888888888888`)
80	^ lo(extra);
81	let hi = (hi(c0) & `0x1111111111111111`)
82	^ (hi(c1) & `0x2222222222222222`)
83	^ (hi(c2) & `0x4444444444444444`)
84	^ (hi(c3) & `0x8888888888888888`)
85	^ hi(extra);
86	(lo, hi)
87	}
88
89	#[cfg(not(target_pointer_width = "64"))]
90	fn gcm_mul32_nohw(a: u32, b: u32) -> u64 {
91	#[inline(always)]
92	fn mul(a: u32, b: u32) -> u64 {
93	u64::from(a) * u64::from(b)
94	}
95
96	// One term every four bits means the largest term is 32/4 = 8, which does not
97	// overflow into the next term.
98	let a0 = a & `0x11111111`;
99	let a1 = a & `0x22222222`;
100	let a2 = a & `0x44444444`;
101	let a3 = a & `0x88888888`;
102
103	let b0 = b & `0x11111111`;
104	let b1 = b & `0x22222222`;
105	let b2 = b & `0x44444444`;
106	let b3 = b & `0x88888888`;
107
108	let c0 = mul(a0, b0) ^ mul(a1, b3) ^ mul(a2, b2) ^ mul(a3, b1);
109	let c1 = mul(a0, b1) ^ mul(a1, b0) ^ mul(a2, b3) ^ mul(a3, b2);
110	let c2 = mul(a0, b2) ^ mul(a1, b1) ^ mul(a2, b0) ^ mul(a3, b3);
111	let c3 = mul(a0, b3) ^ mul(a1, b2) ^ mul(a2, b1) ^ mul(a3, b0);
112
113	(c0 & `0x1111111111111111`)
114	\| (c1 & `0x2222222222222222`)
115	\| (c2 & `0x4444444444444444`)
116	\| (c3 & `0x8888888888888888`)
117	}
118
119	#[cfg(not(target_pointer_width = "64"))]
120	fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) {
121	#[inline(always)]
122	fn lo(a: u64) -> u32 {
123	a as u32
124	}
125	#[inline(always)]
126	fn hi(a: u64) -> u32 {
127	lo(a >> `32`)
128	}
129
130	let a0 = lo(a);
131	let a1 = hi(a);
132	let b0 = lo(b);
133	let b1 = hi(b);
134	// Karatsuba multiplication.
135	let lo = gcm_mul32_nohw(a0, b0);
136	let hi = gcm_mul32_nohw(a1, b1);
137	let mid = gcm_mul32_nohw(a0 ^ a1, b0 ^ b1) ^ lo ^ hi;
138	(lo ^ (mid << `32`), hi ^ (mid >> `32`))
139	}
140
141	pub(super) fn init(xi: [u64; `2`]) -> super::u128 {
142	// We implement GHASH in terms of POLYVAL, as described in RFC 8452. This
143	// avoids a shift by 1 in the multiplication, needed to account for bit
144	// reversal losing a bit after multiplication, that is,
145	// rev128(X) rev128(Y) = rev255(XY).
146	//
147	// Per Appendix A, we run mulX_POLYVAL. Note this is the same transformation
148	// applied by \|gcm_init_clmul\|, etc. Note \|Xi\| has already been byteswapped.
149	//
150	// See also slide 16 of
151	// https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf
152	let mut lo = xi[`1`];
153	let mut hi = xi[`0`];
154
155	let mut carry = hi >> `63`;
156	carry = `0u64`.wrapping_sub(carry);
157
158	hi <<= `1`;
159	hi \|= lo >> `63`;
160	lo <<= `1`;
161
162	// The irreducible polynomial is 1 + x^121 + x^126 + x^127 + x^128, so we
163	// conditionally add 0xc200...0001.
164	lo ^= carry & `1`;
165	hi ^= carry & `0xc200000000000000`;
166
167	// This implementation does not use the rest of \|Htable\|.
168	super::u128 { hi, lo }
169	}
170
171	fn gcm_polyval_nohw(xi: &mut [u64; `2`], h: super::u128) {
172	// Karatsuba multiplication. The product of \|Xi\| and \|H\| is stored in \|r0\|
173	// through \|r3\|. Note there is no byte or bit reversal because we are
174	// evaluating POLYVAL.
175	let (r0, mut r1) = gcm_mul64_nohw(xi[`0`], h.lo);
176	let (mut r2, mut r3) = gcm_mul64_nohw(xi[`1`], h.hi);
177	let (mut mid0, mut mid1) = gcm_mul64_nohw(xi[`0`] ^ xi[`1`], h.hi ^ h.lo);
178	mid0 ^= r0 ^ r2;
179	mid1 ^= r1 ^ r3;
180	r2 ^= mid1;
181	r1 ^= mid0;
182
183	// Now we multiply our 256-bit result by x^-128 and reduce. \|r2\| and
184	// \|r3\| shifts into position and we must multiply \|r0\| and \|r1\| by x^-128. We
185	// have:
186	//
187	// 1 = x^121 + x^126 + x^127 + x^128
188	// x^-128 = x^-7 + x^-2 + x^-1 + 1
189	//
190	// This is the GHASH reduction step, but with bits flowing in reverse.
191
192	// The x^-7, x^-2, and x^-1 terms shift bits past x^0, which would require
193	// another reduction steps. Instead, we gather the excess bits, incorporate
194	// them into \|r0\| and \|r1\| and reduce once. See slides 17-19
195	// of https://crypto.stanford.edu/RealWorldCrypto/slides/gueron.pdf.
196	r1 ^= (r0 << `63`) ^ (r0 << `62`) ^ (r0 << `57`);
197
198	// 1
199	r2 ^= r0;
200	r3 ^= r1;
201
202	// x^-1
203	r2 ^= r0 >> `1`;
204	r2 ^= r1 << `63`;
205	r3 ^= r1 >> `1`;
206
207	// x^-2
208	r2 ^= r0 >> `2`;
209	r2 ^= r1 << `62`;
210	r3 ^= r1 >> `2`;
211
212	// x^-7
213	r2 ^= r0 >> `7`;
214	r2 ^= r1 << `57`;
215	r3 ^= r1 >> `7`;
216
217	*xi = [r2, r3];
218	}
219
220	pub(super) fn gmult(xi: &mut Xi, h: super::u128) {
221	with_swapped_xi(xi, \|swapped: &mut [u64; 2]\| {
222	gcm_polyval_nohw(xi:swapped, h);
223	})
224	}
225
226	pub(super) fn ghash(xi: &mut Xi, h: super::u128, input: &[[u8; BLOCK_LEN]]) {
227	with_swapped_xi(xi, \|swapped: &mut [u64; 2]\| {
228	input.iter().for_each(\|&input: [u8; 16]\| {
229	let input: [u64; 2] = input.array_split_map(u64::from_be_bytes);
230	swapped[`0`] ^= input[`1`];
231	swapped[`1`] ^= input[`0`];
232	gcm_polyval_nohw(xi:swapped, h);
233	});
234	});
235	}
236
237	#[inline]
238	fn with_swapped_xi(Xi(xi: &mut Block): &mut Xi, f: impl FnOnce(&mut [u64; `2`])) {
239	let unswapped: [u64; `2`] = xi.as_ref().array_split_map(u64::from_be_bytes);
240	let mut swapped: [u64; `2`] = [unswapped[`1`], unswapped[`0`]];
241	f(&mut swapped);
242	let reswapped: [u64; 2] = [swapped[`1`], swapped[`0`]];
243	xi = Block::from(reswapped.map(u64*::to_be_bytes))
244	}
245

Provided by KDAB

Definitions