1 | // Copyright 2016 Brian Smith. |
2 | // Portions Copyright (c) 2016, Google Inc. |
3 | // |
4 | // Permission to use, copy, modify, and/or distribute this software for any |
5 | // purpose with or without fee is hereby granted, provided that the above |
6 | // copyright notice and this permission notice appear in all copies. |
7 | // |
8 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
9 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
10 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
11 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
12 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
13 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
14 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
15 | |
16 | use super::{overlapping, quic::Sample, Nonce}; |
17 | use crate::cpu; |
18 | use cfg_if::cfg_if; |
19 | |
20 | cfg_if! { |
21 | if #[cfg(any( |
22 | all(target_arch = "aarch64" , target_endian = "little" ), |
23 | all(target_arch = "arm" , target_endian = "little" ), |
24 | target_arch = "x86" , |
25 | target_arch = "x86_64" |
26 | ))] { |
27 | #[macro_use ] |
28 | mod ffi; |
29 | #[cfg (any(target_arch = "x86" , test))] |
30 | mod fallback; |
31 | } else { |
32 | mod fallback; |
33 | } |
34 | } |
35 | |
36 | use crate::polyfill::ArraySplitMap; |
37 | |
38 | pub type Overlapping<'o> = overlapping::Overlapping<'o, u8>; |
39 | |
40 | #[derive (Clone)] |
41 | pub struct Key { |
42 | words: [u32; KEY_LEN / 4], |
43 | } |
44 | |
45 | impl Key { |
46 | pub(super) fn new(value: [u8; KEY_LEN]) -> Self { |
47 | Self { |
48 | words: value.array_split_map(u32::from_le_bytes), |
49 | } |
50 | } |
51 | } |
52 | |
53 | impl Key { |
54 | // Encrypts `in_out` with the counter 0 and returns counter 1, |
55 | // where the counter is derived from the nonce `nonce`. |
56 | #[inline ] |
57 | pub(super) fn encrypt_single_block_with_ctr_0<const N: usize>( |
58 | &self, |
59 | nonce: Nonce, |
60 | in_out: &mut [u8; N], |
61 | cpu: cpu::Features, |
62 | ) -> Counter { |
63 | assert!(N <= BLOCK_LEN); |
64 | let (zero, one) = Counter::zero_one_less_safe(nonce); |
65 | self.encrypt(zero, in_out.as_mut().into(), cpu); |
66 | one |
67 | } |
68 | |
69 | #[inline ] |
70 | pub fn new_mask(&self, sample: Sample) -> [u8; 5] { |
71 | let cpu = cpu::features(); // TODO: Remove this. |
72 | let (ctr, nonce) = sample.split_at(4); |
73 | let ctr = u32::from_le_bytes(ctr.try_into().unwrap()); |
74 | let nonce = Nonce::assume_unique_for_key(nonce.try_into().unwrap()); |
75 | let ctr = Counter::from_nonce_and_ctr(nonce, ctr); |
76 | |
77 | let mut out: [u8; 5] = [0; 5]; |
78 | self.encrypt(ctr, out.as_mut().into(), cpu); |
79 | out |
80 | } |
81 | |
82 | #[inline (always)] |
83 | pub(super) fn encrypt(&self, counter: Counter, in_out: Overlapping<'_>, cpu: cpu::Features) { |
84 | cfg_if! { |
85 | if #[cfg(all(target_arch = "aarch64" , target_endian = "little" ))] { |
86 | use cpu::{GetFeature as _, arm::Neon}; |
87 | const NEON_MIN_LEN: usize = 192 + 1; |
88 | if in_out.len() >= NEON_MIN_LEN { |
89 | if let Some(cpu) = cpu.get_feature() { |
90 | return chacha20_ctr32_ffi!( |
91 | unsafe { (NEON_MIN_LEN, Neon, Overlapping<'_>) => ChaCha20_ctr32_neon }, |
92 | self, counter, in_out, cpu); |
93 | } |
94 | } |
95 | if in_out.len() >= 1 { |
96 | chacha20_ctr32_ffi!( |
97 | unsafe { (1, (), Overlapping<'_>) => ChaCha20_ctr32_nohw }, |
98 | self, counter, in_out, ()) |
99 | } |
100 | } else if #[cfg(all(target_arch = "arm" , target_endian = "little" ))] { |
101 | use cpu::{GetFeature as _, arm::Neon}; |
102 | const NEON_MIN_LEN: usize = 192 + 1; |
103 | if in_out.len() >= NEON_MIN_LEN { |
104 | if let Some(cpu) = cpu.get_feature() { |
105 | return chacha20_ctr32_ffi!( |
106 | unsafe { (NEON_MIN_LEN, Neon, &mut [u8]) => ChaCha20_ctr32_neon }, |
107 | self, counter, in_out.copy_within(), cpu); |
108 | } |
109 | } |
110 | if in_out.len() >= 1 { |
111 | chacha20_ctr32_ffi!( |
112 | unsafe { (1, (), &mut [u8]) => ChaCha20_ctr32_nohw }, |
113 | self, counter, in_out.copy_within(), ()) |
114 | } |
115 | } else if #[cfg(target_arch = "x86" )] { |
116 | use cpu::{GetFeature as _, intel::Ssse3}; |
117 | if in_out.len() >= 1 { |
118 | if let Some(cpu) = cpu.get_feature() { |
119 | chacha20_ctr32_ffi!( |
120 | unsafe { (1, Ssse3, &mut [u8]) => ChaCha20_ctr32_ssse3 }, |
121 | self, counter, in_out.copy_within(), cpu) |
122 | } else { |
123 | let _: cpu::Features = cpu; |
124 | fallback::ChaCha20_ctr32(self, counter, in_out) |
125 | } |
126 | } |
127 | } else if #[cfg(target_arch = "x86_64" )] { |
128 | use cpu::{GetFeature, intel::{Avx2, Ssse3}}; |
129 | const SSE_MIN_LEN: usize = 128 + 1; // Also AVX2, SSSE3_4X, SSSE3 |
130 | if in_out.len() >= SSE_MIN_LEN { |
131 | let values = cpu.values(); |
132 | if let Some(cpu) = values.get_feature() { |
133 | return chacha20_ctr32_ffi!( |
134 | unsafe { (SSE_MIN_LEN, Avx2, Overlapping<'_>) => ChaCha20_ctr32_avx2 }, |
135 | self, counter, in_out, cpu); |
136 | } |
137 | if let Some(cpu) = values.get_feature() { |
138 | return chacha20_ctr32_ffi!( |
139 | unsafe { (SSE_MIN_LEN, Ssse3, Overlapping<'_>) => |
140 | ChaCha20_ctr32_ssse3_4x }, |
141 | self, counter, in_out, cpu); |
142 | } |
143 | } |
144 | if in_out.len() >= 1 { |
145 | chacha20_ctr32_ffi!( |
146 | unsafe { (1, (), Overlapping<'_>) => ChaCha20_ctr32_nohw }, |
147 | self, counter, in_out, ()) |
148 | } |
149 | } else { |
150 | let _: cpu::Features = cpu; |
151 | fallback::ChaCha20_ctr32(self, counter, in_out) |
152 | } |
153 | } |
154 | } |
155 | |
156 | #[inline ] |
157 | pub(super) fn words_less_safe(&self) -> &[u32; KEY_LEN / 4] { |
158 | &self.words |
159 | } |
160 | } |
161 | |
162 | /// Counter || Nonce, all native endian. |
163 | #[repr (transparent)] |
164 | pub struct Counter([u32; 4]); |
165 | |
166 | impl Counter { |
167 | // Nonce-reuse: the caller must only use the first counter (0) for at most |
168 | // a single block. |
169 | fn zero_one_less_safe(nonce: Nonce) -> (Self, Self) { |
170 | let ctr0 @ Self([_, n0, n1, n2]) = Self::from_nonce_and_ctr(nonce, 0); |
171 | let ctr1 = Self([1, n0, n1, n2]); |
172 | (ctr0, ctr1) |
173 | } |
174 | |
175 | fn from_nonce_and_ctr(nonce: Nonce, ctr: u32) -> Self { |
176 | let [n0, n1, n2] = nonce.as_ref().array_split_map(u32::from_le_bytes); |
177 | Self([ctr, n0, n1, n2]) |
178 | } |
179 | |
180 | /// This is "less safe" because it hands off management of the counter to |
181 | /// the caller. |
182 | #[cfg (any( |
183 | test, |
184 | not(any( |
185 | all(target_arch = "aarch64" , target_endian = "little" ), |
186 | all(target_arch = "arm" , target_endian = "little" ), |
187 | target_arch = "x86_64" |
188 | )) |
189 | ))] |
190 | fn into_words_less_safe(self) -> [u32; 4] { |
191 | self.0 |
192 | } |
193 | } |
194 | |
195 | pub const KEY_LEN: usize = 32; |
196 | |
197 | const BLOCK_LEN: usize = 64; |
198 | |
199 | #[cfg (test)] |
200 | mod tests { |
201 | extern crate alloc; |
202 | |
203 | use super::{super::overlapping::IndexError, *}; |
204 | use crate::{error, test}; |
205 | use alloc::vec; |
206 | |
207 | const MAX_ALIGNMENT_AND_OFFSET: (usize, usize) = (15, 259); |
208 | const MAX_ALIGNMENT_AND_OFFSET_SUBSET: (usize, usize) = |
209 | if cfg!(any(not(debug_assertions), feature = "slow_tests" )) { |
210 | MAX_ALIGNMENT_AND_OFFSET |
211 | } else { |
212 | (0, 0) |
213 | }; |
214 | |
215 | #[test ] |
216 | fn chacha20_test_default() { |
217 | // Always use `MAX_OFFSET` if we hav assembly code. |
218 | let max_offset = if cfg!(any( |
219 | all(target_arch = "aarch64" , target_endian = "little" ), |
220 | all(target_arch = "arm" , target_endian = "little" ), |
221 | target_arch = "x86" , |
222 | target_arch = "x86_64" |
223 | )) { |
224 | MAX_ALIGNMENT_AND_OFFSET |
225 | } else { |
226 | MAX_ALIGNMENT_AND_OFFSET_SUBSET |
227 | }; |
228 | chacha20_test(max_offset, Key::encrypt); |
229 | } |
230 | |
231 | // Smoketest the fallback implementation. |
232 | #[test ] |
233 | fn chacha20_test_fallback() { |
234 | chacha20_test(MAX_ALIGNMENT_AND_OFFSET_SUBSET, |key, ctr, in_out, _cpu| { |
235 | fallback::ChaCha20_ctr32(key, ctr, in_out) |
236 | }); |
237 | } |
238 | |
239 | // Verifies the encryption is successful when done on overlapping buffers. |
240 | // |
241 | // On some branches of the 32-bit x86 and ARM assembly code the in-place |
242 | // operation fails in some situations where the input/output buffers are |
243 | // not exactly overlapping. Such failures are dependent not only on the |
244 | // degree of overlapping but also the length of the data. `encrypt_within` |
245 | // works around that. |
246 | fn chacha20_test( |
247 | max_alignment_and_offset: (usize, usize), |
248 | f: impl for<'k, 'o> Fn(&'k Key, Counter, Overlapping<'o>, cpu::Features), |
249 | ) { |
250 | let cpu = cpu::features(); |
251 | |
252 | // Reuse a buffer to avoid slowing down the tests with allocations. |
253 | let mut buf = vec![0u8; 1300]; |
254 | |
255 | test::run(test_file!("chacha_tests.txt" ), move |section, test_case| { |
256 | assert_eq!(section, "" ); |
257 | |
258 | let key = test_case .consume_bytes("Key" ); |
259 | let key: &[u8; KEY_LEN] = key.as_slice().try_into()?; |
260 | let key = Key::new(*key); |
261 | |
262 | let ctr = test_case .consume_usize("Ctr" ); |
263 | let nonce = test_case .consume_bytes("Nonce" ); |
264 | let input = test_case .consume_bytes("Input" ); |
265 | let output = test_case .consume_bytes("Output" ); |
266 | |
267 | // Run the test case over all prefixes of the input because the |
268 | // behavior of ChaCha20 implementation changes dependent on the |
269 | // length of the input. |
270 | for len in 0..=input.len() { |
271 | #[allow (clippy::cast_possible_truncation)] |
272 | chacha20_test_case_inner( |
273 | &key, |
274 | &nonce, |
275 | ctr as u32, |
276 | &input[..len], |
277 | &output[..len], |
278 | &mut buf, |
279 | max_alignment_and_offset, |
280 | cpu, |
281 | &f, |
282 | ); |
283 | } |
284 | |
285 | Ok(()) |
286 | }); |
287 | } |
288 | |
289 | fn chacha20_test_case_inner( |
290 | key: &Key, |
291 | nonce: &[u8], |
292 | ctr: u32, |
293 | input: &[u8], |
294 | expected: &[u8], |
295 | buf: &mut [u8], |
296 | (max_alignment, max_offset): (usize, usize), |
297 | cpu: cpu::Features, |
298 | f: &impl for<'k, 'o> Fn(&'k Key, Counter, Overlapping<'o>, cpu::Features), |
299 | ) { |
300 | const ARBITRARY: u8 = 123; |
301 | |
302 | for alignment in 0..=max_alignment { |
303 | buf[..alignment].fill(ARBITRARY); |
304 | let buf = &mut buf[alignment..]; |
305 | for offset in 0..=max_offset { |
306 | let buf = &mut buf[..(offset + input.len())]; |
307 | buf[..offset].fill(ARBITRARY); |
308 | let src = offset..; |
309 | buf[src.clone()].copy_from_slice(input); |
310 | |
311 | let ctr = Counter::from_nonce_and_ctr( |
312 | Nonce::try_assume_unique_for_key(nonce).unwrap(), |
313 | ctr, |
314 | ); |
315 | let in_out = Overlapping::new(buf, src) |
316 | .map_err(error::erase::<IndexError>) |
317 | .unwrap(); |
318 | f(key, ctr, in_out, cpu); |
319 | assert_eq!(&buf[..input.len()], expected) |
320 | } |
321 | } |
322 | } |
323 | } |
324 | |