1 | // Copyright 2016 Brian Smith. |
2 | // Portions Copyright (c) 2016, Google Inc. |
3 | // |
4 | // Permission to use, copy, modify, and/or distribute this software for any |
5 | // purpose with or without fee is hereby granted, provided that the above |
6 | // copyright notice and this permission notice appear in all copies. |
7 | // |
8 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
9 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
10 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
11 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
12 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
13 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
14 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
15 | |
16 | use super::{quic::Sample, Nonce}; |
17 | |
18 | #[cfg (any( |
19 | test, |
20 | not(any( |
21 | target_arch = "aarch64" , |
22 | target_arch = "arm" , |
23 | target_arch = "x86" , |
24 | target_arch = "x86_64" |
25 | )) |
26 | ))] |
27 | mod fallback; |
28 | |
29 | use crate::polyfill::ArraySplitMap; |
30 | use core::ops::RangeFrom; |
31 | |
32 | #[derive (Clone)] |
33 | pub struct Key { |
34 | words: [u32; KEY_LEN / 4], |
35 | } |
36 | |
37 | impl Key { |
38 | pub(super) fn new(value: [u8; KEY_LEN]) -> Self { |
39 | Self { |
40 | words: value.array_split_map(u32::from_le_bytes), |
41 | } |
42 | } |
43 | } |
44 | |
45 | impl Key { |
46 | #[inline ] |
47 | pub fn encrypt_in_place(&self, counter: Counter, in_out: &mut [u8]) { |
48 | self.encrypt_less_safe(counter, in_out, 0..); |
49 | } |
50 | |
51 | #[inline ] |
52 | pub fn encrypt_iv_xor_in_place(&self, iv: Iv, in_out: &mut [u8; 32]) { |
53 | // It is safe to use `into_counter_for_single_block_less_safe()` |
54 | // because `in_out` is exactly one block long. |
55 | debug_assert!(in_out.len() <= BLOCK_LEN); |
56 | self.encrypt_less_safe(iv.into_counter_for_single_block_less_safe(), in_out, 0..); |
57 | } |
58 | |
59 | #[inline ] |
60 | pub fn new_mask(&self, sample: Sample) -> [u8; 5] { |
61 | let mut out: [u8; 5] = [0; 5]; |
62 | let iv = Iv::assume_unique_for_key(sample); |
63 | |
64 | debug_assert!(out.len() <= BLOCK_LEN); |
65 | self.encrypt_less_safe(iv.into_counter_for_single_block_less_safe(), &mut out, 0..); |
66 | |
67 | out |
68 | } |
69 | |
70 | /// Analogous to `slice::copy_within()`. |
71 | pub fn encrypt_within(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>) { |
72 | // XXX: The x86 and at least one branch of the ARM assembly language |
73 | // code doesn't allow overlapping input and output unless they are |
74 | // exactly overlapping. TODO: Figure out which branch of the ARM code |
75 | // has this limitation and come up with a better solution. |
76 | // |
77 | // https://rt.openssl.org/Ticket/Display.html?id=4362 |
78 | if cfg!(any(target_arch = "arm" , target_arch = "x86" )) && src.start != 0 { |
79 | let len = in_out.len() - src.start; |
80 | in_out.copy_within(src, 0); |
81 | self.encrypt_in_place(counter, &mut in_out[..len]); |
82 | } else { |
83 | self.encrypt_less_safe(counter, in_out, src); |
84 | } |
85 | } |
86 | |
87 | /// This is "less safe" because it skips the important check that `encrypt_within` does. |
88 | /// Only call this with `src` equal to `0..` or from `encrypt_within`. |
89 | #[inline ] |
90 | fn encrypt_less_safe(&self, counter: Counter, in_out: &mut [u8], src: RangeFrom<usize>) { |
91 | #[cfg (any( |
92 | target_arch = "aarch64" , |
93 | target_arch = "arm" , |
94 | target_arch = "x86" , |
95 | target_arch = "x86_64" |
96 | ))] |
97 | #[inline (always)] |
98 | pub(super) fn ChaCha20_ctr32( |
99 | key: &Key, |
100 | counter: Counter, |
101 | in_out: &mut [u8], |
102 | src: RangeFrom<usize>, |
103 | ) { |
104 | let in_out_len = in_out.len().checked_sub(src.start).unwrap(); |
105 | |
106 | // There's no need to worry if `counter` is incremented because it is |
107 | // owned here and we drop immediately after the call. |
108 | prefixed_extern! { |
109 | fn ChaCha20_ctr32( |
110 | out: *mut u8, |
111 | in_: *const u8, |
112 | in_len: crate::c::size_t, |
113 | key: &[u32; KEY_LEN / 4], |
114 | counter: &Counter, |
115 | ); |
116 | } |
117 | unsafe { |
118 | ChaCha20_ctr32( |
119 | in_out.as_mut_ptr(), |
120 | in_out[src].as_ptr(), |
121 | in_out_len, |
122 | key.words_less_safe(), |
123 | &counter, |
124 | ) |
125 | } |
126 | } |
127 | |
128 | #[cfg (not(any( |
129 | target_arch = "aarch64" , |
130 | target_arch = "arm" , |
131 | target_arch = "x86" , |
132 | target_arch = "x86_64" |
133 | )))] |
134 | use fallback::ChaCha20_ctr32; |
135 | |
136 | ChaCha20_ctr32(self, counter, in_out, src); |
137 | } |
138 | |
139 | #[inline ] |
140 | pub(super) fn words_less_safe(&self) -> &[u32; KEY_LEN / 4] { |
141 | &self.words |
142 | } |
143 | } |
144 | |
145 | /// Counter || Nonce, all native endian. |
146 | #[repr (transparent)] |
147 | pub struct Counter([u32; 4]); |
148 | |
149 | impl Counter { |
150 | pub fn zero(nonce: Nonce) -> Self { |
151 | Self::from_nonce_and_ctr(nonce, 0) |
152 | } |
153 | |
154 | fn from_nonce_and_ctr(nonce: Nonce, ctr: u32) -> Self { |
155 | let [n0, n1, n2] = nonce.as_ref().array_split_map(u32::from_le_bytes); |
156 | Self([ctr, n0, n1, n2]) |
157 | } |
158 | |
159 | pub fn increment(&mut self) -> Iv { |
160 | let iv = Iv(self.0); |
161 | self.0[0] += 1; |
162 | iv |
163 | } |
164 | |
165 | /// This is "less safe" because it hands off management of the counter to |
166 | /// the caller. |
167 | #[cfg (any( |
168 | test, |
169 | not(any( |
170 | target_arch = "aarch64" , |
171 | target_arch = "arm" , |
172 | target_arch = "x86" , |
173 | target_arch = "x86_64" |
174 | )) |
175 | ))] |
176 | fn into_words_less_safe(self) -> [u32; 4] { |
177 | self.0 |
178 | } |
179 | } |
180 | |
181 | /// The IV for a single block encryption. |
182 | /// |
183 | /// Intentionally not `Clone` to ensure each is used only once. |
184 | pub struct Iv([u32; 4]); |
185 | |
186 | impl Iv { |
187 | fn assume_unique_for_key(value: [u8; 16]) -> Self { |
188 | Self(value.array_split_map(u32::from_le_bytes)) |
189 | } |
190 | |
191 | fn into_counter_for_single_block_less_safe(self) -> Counter { |
192 | Counter(self.0) |
193 | } |
194 | } |
195 | |
196 | pub const KEY_LEN: usize = 32; |
197 | |
198 | const BLOCK_LEN: usize = 64; |
199 | |
200 | #[cfg (test)] |
201 | mod tests { |
202 | extern crate alloc; |
203 | |
204 | use super::*; |
205 | use crate::test; |
206 | use alloc::vec; |
207 | |
208 | const MAX_ALIGNMENT_AND_OFFSET: (usize, usize) = (15, 259); |
209 | const MAX_ALIGNMENT_AND_OFFSET_SUBSET: (usize, usize) = |
210 | if cfg!(any(debug_assertions = "false" , feature = "slow_tests" )) { |
211 | MAX_ALIGNMENT_AND_OFFSET |
212 | } else { |
213 | (0, 0) |
214 | }; |
215 | |
216 | #[test ] |
217 | fn chacha20_test_default() { |
218 | // Always use `MAX_OFFSET` if we hav assembly code. |
219 | let max_offset = if cfg!(any( |
220 | target_arch = "aarch64" , |
221 | target_arch = "arm" , |
222 | target_arch = "x86" , |
223 | target_arch = "x86_64" |
224 | )) { |
225 | MAX_ALIGNMENT_AND_OFFSET |
226 | } else { |
227 | MAX_ALIGNMENT_AND_OFFSET_SUBSET |
228 | }; |
229 | chacha20_test(max_offset, Key::encrypt_within); |
230 | } |
231 | |
232 | // Smoketest the fallback implementation. |
233 | #[test ] |
234 | fn chacha20_test_fallback() { |
235 | chacha20_test(MAX_ALIGNMENT_AND_OFFSET_SUBSET, fallback::ChaCha20_ctr32); |
236 | } |
237 | |
238 | // Verifies the encryption is successful when done on overlapping buffers. |
239 | // |
240 | // On some branches of the 32-bit x86 and ARM assembly code the in-place |
241 | // operation fails in some situations where the input/output buffers are |
242 | // not exactly overlapping. Such failures are dependent not only on the |
243 | // degree of overlapping but also the length of the data. `encrypt_within` |
244 | // works around that. |
245 | fn chacha20_test( |
246 | max_alignment_and_offset: (usize, usize), |
247 | f: impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>), |
248 | ) { |
249 | // Reuse a buffer to avoid slowing down the tests with allocations. |
250 | let mut buf = vec![0u8; 1300]; |
251 | |
252 | test::run(test_file!("chacha_tests.txt" ), move |section, test_case| { |
253 | assert_eq!(section, "" ); |
254 | |
255 | let key = test_case .consume_bytes("Key" ); |
256 | let key: &[u8; KEY_LEN] = key.as_slice().try_into()?; |
257 | let key = Key::new(*key); |
258 | |
259 | let ctr = test_case .consume_usize("Ctr" ); |
260 | let nonce = test_case .consume_bytes("Nonce" ); |
261 | let input = test_case .consume_bytes("Input" ); |
262 | let output = test_case .consume_bytes("Output" ); |
263 | |
264 | // Run the test case over all prefixes of the input because the |
265 | // behavior of ChaCha20 implementation changes dependent on the |
266 | // length of the input. |
267 | for len in 0..=input.len() { |
268 | #[allow (clippy::cast_possible_truncation)] |
269 | chacha20_test_case_inner( |
270 | &key, |
271 | &nonce, |
272 | ctr as u32, |
273 | &input[..len], |
274 | &output[..len], |
275 | &mut buf, |
276 | max_alignment_and_offset, |
277 | &f, |
278 | ); |
279 | } |
280 | |
281 | Ok(()) |
282 | }); |
283 | } |
284 | |
285 | fn chacha20_test_case_inner( |
286 | key: &Key, |
287 | nonce: &[u8], |
288 | ctr: u32, |
289 | input: &[u8], |
290 | expected: &[u8], |
291 | buf: &mut [u8], |
292 | (max_alignment, max_offset): (usize, usize), |
293 | f: &impl for<'k, 'i> Fn(&'k Key, Counter, &'i mut [u8], RangeFrom<usize>), |
294 | ) { |
295 | const ARBITRARY: u8 = 123; |
296 | |
297 | for alignment in 0..=max_alignment { |
298 | buf[..alignment].fill(ARBITRARY); |
299 | let buf = &mut buf[alignment..]; |
300 | for offset in 0..=max_offset { |
301 | let buf = &mut buf[..(offset + input.len())]; |
302 | buf[..offset].fill(ARBITRARY); |
303 | let src = offset..; |
304 | buf[src.clone()].copy_from_slice(input); |
305 | |
306 | let ctr = Counter::from_nonce_and_ctr( |
307 | Nonce::try_assume_unique_for_key(nonce).unwrap(), |
308 | ctr, |
309 | ); |
310 | f(key, ctr, buf, src); |
311 | assert_eq!(&buf[..input.len()], expected) |
312 | } |
313 | } |
314 | } |
315 | } |
316 | |