| 1 | // Copyright 2016 Brian Smith. |
| 2 | // Portions Copyright (c) 2016, Google Inc. |
| 3 | // |
| 4 | // Permission to use, copy, modify, and/or distribute this software for any |
| 5 | // purpose with or without fee is hereby granted, provided that the above |
| 6 | // copyright notice and this permission notice appear in all copies. |
| 7 | // |
| 8 | // THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES |
| 9 | // WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 10 | // MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY |
| 11 | // SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 12 | // WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 13 | // OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 14 | // CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. |
| 15 | |
| 16 | use super::{overlapping, quic::Sample, Nonce}; |
| 17 | use crate::cpu; |
| 18 | use cfg_if::cfg_if; |
| 19 | |
| 20 | cfg_if! { |
| 21 | if #[cfg(any( |
| 22 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 23 | all(target_arch = "arm" , target_endian = "little" ), |
| 24 | target_arch = "x86" , |
| 25 | target_arch = "x86_64" |
| 26 | ))] { |
| 27 | #[macro_use ] |
| 28 | mod ffi; |
| 29 | #[cfg (any(target_arch = "x86" , test))] |
| 30 | mod fallback; |
| 31 | } else { |
| 32 | mod fallback; |
| 33 | } |
| 34 | } |
| 35 | |
| 36 | use crate::polyfill::ArraySplitMap; |
| 37 | |
| 38 | pub type Overlapping<'o> = overlapping::Overlapping<'o, u8>; |
| 39 | |
| 40 | #[derive (Clone)] |
| 41 | pub struct Key { |
| 42 | words: [u32; KEY_LEN / 4], |
| 43 | } |
| 44 | |
| 45 | impl Key { |
| 46 | pub(super) fn new(value: [u8; KEY_LEN]) -> Self { |
| 47 | Self { |
| 48 | words: value.array_split_map(u32::from_le_bytes), |
| 49 | } |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | impl Key { |
| 54 | // Encrypts `in_out` with the counter 0 and returns counter 1, |
| 55 | // where the counter is derived from the nonce `nonce`. |
| 56 | #[inline ] |
| 57 | pub(super) fn encrypt_single_block_with_ctr_0<const N: usize>( |
| 58 | &self, |
| 59 | nonce: Nonce, |
| 60 | in_out: &mut [u8; N], |
| 61 | cpu: cpu::Features, |
| 62 | ) -> Counter { |
| 63 | assert!(N <= BLOCK_LEN); |
| 64 | let (zero, one) = Counter::zero_one_less_safe(nonce); |
| 65 | self.encrypt(zero, in_out.as_mut().into(), cpu); |
| 66 | one |
| 67 | } |
| 68 | |
| 69 | #[inline ] |
| 70 | pub fn new_mask(&self, sample: Sample) -> [u8; 5] { |
| 71 | let cpu = cpu::features(); // TODO: Remove this. |
| 72 | let (ctr, nonce) = sample.split_at(4); |
| 73 | let ctr = u32::from_le_bytes(ctr.try_into().unwrap()); |
| 74 | let nonce = Nonce::assume_unique_for_key(nonce.try_into().unwrap()); |
| 75 | let ctr = Counter::from_nonce_and_ctr(nonce, ctr); |
| 76 | |
| 77 | let mut out: [u8; 5] = [0; 5]; |
| 78 | self.encrypt(ctr, out.as_mut().into(), cpu); |
| 79 | out |
| 80 | } |
| 81 | |
| 82 | #[inline (always)] |
| 83 | pub(super) fn encrypt(&self, counter: Counter, in_out: Overlapping<'_>, cpu: cpu::Features) { |
| 84 | cfg_if! { |
| 85 | if #[cfg(all(target_arch = "aarch64" , target_endian = "little" ))] { |
| 86 | use cpu::{GetFeature as _, arm::Neon}; |
| 87 | const NEON_MIN_LEN: usize = 192 + 1; |
| 88 | if in_out.len() >= NEON_MIN_LEN { |
| 89 | if let Some(cpu) = cpu.get_feature() { |
| 90 | return chacha20_ctr32_ffi!( |
| 91 | unsafe { (NEON_MIN_LEN, Neon, Overlapping<'_>) => ChaCha20_ctr32_neon }, |
| 92 | self, counter, in_out, cpu); |
| 93 | } |
| 94 | } |
| 95 | if in_out.len() >= 1 { |
| 96 | chacha20_ctr32_ffi!( |
| 97 | unsafe { (1, (), Overlapping<'_>) => ChaCha20_ctr32_nohw }, |
| 98 | self, counter, in_out, ()) |
| 99 | } |
| 100 | } else if #[cfg(all(target_arch = "arm" , target_endian = "little" ))] { |
| 101 | use cpu::{GetFeature as _, arm::Neon}; |
| 102 | const NEON_MIN_LEN: usize = 192 + 1; |
| 103 | if in_out.len() >= NEON_MIN_LEN { |
| 104 | if let Some(cpu) = cpu.get_feature() { |
| 105 | return chacha20_ctr32_ffi!( |
| 106 | unsafe { (NEON_MIN_LEN, Neon, &mut [u8]) => ChaCha20_ctr32_neon }, |
| 107 | self, counter, in_out.copy_within(), cpu); |
| 108 | } |
| 109 | } |
| 110 | if in_out.len() >= 1 { |
| 111 | chacha20_ctr32_ffi!( |
| 112 | unsafe { (1, (), &mut [u8]) => ChaCha20_ctr32_nohw }, |
| 113 | self, counter, in_out.copy_within(), ()) |
| 114 | } |
| 115 | } else if #[cfg(target_arch = "x86" )] { |
| 116 | use cpu::{GetFeature as _, intel::Ssse3}; |
| 117 | if in_out.len() >= 1 { |
| 118 | if let Some(cpu) = cpu.get_feature() { |
| 119 | chacha20_ctr32_ffi!( |
| 120 | unsafe { (1, Ssse3, &mut [u8]) => ChaCha20_ctr32_ssse3 }, |
| 121 | self, counter, in_out.copy_within(), cpu) |
| 122 | } else { |
| 123 | let _: cpu::Features = cpu; |
| 124 | fallback::ChaCha20_ctr32(self, counter, in_out) |
| 125 | } |
| 126 | } |
| 127 | } else if #[cfg(target_arch = "x86_64" )] { |
| 128 | use cpu::{GetFeature, intel::{Avx2, Ssse3}}; |
| 129 | const SSE_MIN_LEN: usize = 128 + 1; // Also AVX2, SSSE3_4X, SSSE3 |
| 130 | if in_out.len() >= SSE_MIN_LEN { |
| 131 | let values = cpu.values(); |
| 132 | if let Some(cpu) = values.get_feature() { |
| 133 | return chacha20_ctr32_ffi!( |
| 134 | unsafe { (SSE_MIN_LEN, Avx2, Overlapping<'_>) => ChaCha20_ctr32_avx2 }, |
| 135 | self, counter, in_out, cpu); |
| 136 | } |
| 137 | if let Some(cpu) = values.get_feature() { |
| 138 | return chacha20_ctr32_ffi!( |
| 139 | unsafe { (SSE_MIN_LEN, Ssse3, Overlapping<'_>) => |
| 140 | ChaCha20_ctr32_ssse3_4x }, |
| 141 | self, counter, in_out, cpu); |
| 142 | } |
| 143 | } |
| 144 | if in_out.len() >= 1 { |
| 145 | chacha20_ctr32_ffi!( |
| 146 | unsafe { (1, (), Overlapping<'_>) => ChaCha20_ctr32_nohw }, |
| 147 | self, counter, in_out, ()) |
| 148 | } |
| 149 | } else { |
| 150 | let _: cpu::Features = cpu; |
| 151 | fallback::ChaCha20_ctr32(self, counter, in_out) |
| 152 | } |
| 153 | } |
| 154 | } |
| 155 | |
| 156 | #[inline ] |
| 157 | pub(super) fn words_less_safe(&self) -> &[u32; KEY_LEN / 4] { |
| 158 | &self.words |
| 159 | } |
| 160 | } |
| 161 | |
| 162 | /// Counter || Nonce, all native endian. |
| 163 | #[repr (transparent)] |
| 164 | pub struct Counter([u32; 4]); |
| 165 | |
| 166 | impl Counter { |
| 167 | // Nonce-reuse: the caller must only use the first counter (0) for at most |
| 168 | // a single block. |
| 169 | fn zero_one_less_safe(nonce: Nonce) -> (Self, Self) { |
| 170 | let ctr0 @ Self([_, n0, n1, n2]) = Self::from_nonce_and_ctr(nonce, 0); |
| 171 | let ctr1 = Self([1, n0, n1, n2]); |
| 172 | (ctr0, ctr1) |
| 173 | } |
| 174 | |
| 175 | fn from_nonce_and_ctr(nonce: Nonce, ctr: u32) -> Self { |
| 176 | let [n0, n1, n2] = nonce.as_ref().array_split_map(u32::from_le_bytes); |
| 177 | Self([ctr, n0, n1, n2]) |
| 178 | } |
| 179 | |
| 180 | /// This is "less safe" because it hands off management of the counter to |
| 181 | /// the caller. |
| 182 | #[cfg (any( |
| 183 | test, |
| 184 | not(any( |
| 185 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 186 | all(target_arch = "arm" , target_endian = "little" ), |
| 187 | target_arch = "x86_64" |
| 188 | )) |
| 189 | ))] |
| 190 | fn into_words_less_safe(self) -> [u32; 4] { |
| 191 | self.0 |
| 192 | } |
| 193 | } |
| 194 | |
| 195 | pub const KEY_LEN: usize = 32; |
| 196 | |
| 197 | const BLOCK_LEN: usize = 64; |
| 198 | |
| 199 | #[cfg (test)] |
| 200 | mod tests { |
| 201 | extern crate alloc; |
| 202 | |
| 203 | use super::{super::overlapping::IndexError, *}; |
| 204 | use crate::{error, test}; |
| 205 | use alloc::vec; |
| 206 | |
| 207 | const MAX_ALIGNMENT_AND_OFFSET: (usize, usize) = (15, 259); |
| 208 | const MAX_ALIGNMENT_AND_OFFSET_SUBSET: (usize, usize) = |
| 209 | if cfg!(any(not(debug_assertions), feature = "slow_tests" )) { |
| 210 | MAX_ALIGNMENT_AND_OFFSET |
| 211 | } else { |
| 212 | (0, 0) |
| 213 | }; |
| 214 | |
| 215 | #[test ] |
| 216 | fn chacha20_test_default() { |
| 217 | // Always use `MAX_OFFSET` if we hav assembly code. |
| 218 | let max_offset = if cfg!(any( |
| 219 | all(target_arch = "aarch64" , target_endian = "little" ), |
| 220 | all(target_arch = "arm" , target_endian = "little" ), |
| 221 | target_arch = "x86" , |
| 222 | target_arch = "x86_64" |
| 223 | )) { |
| 224 | MAX_ALIGNMENT_AND_OFFSET |
| 225 | } else { |
| 226 | MAX_ALIGNMENT_AND_OFFSET_SUBSET |
| 227 | }; |
| 228 | chacha20_test(max_offset, Key::encrypt); |
| 229 | } |
| 230 | |
| 231 | // Smoketest the fallback implementation. |
| 232 | #[test ] |
| 233 | fn chacha20_test_fallback() { |
| 234 | chacha20_test(MAX_ALIGNMENT_AND_OFFSET_SUBSET, |key, ctr, in_out, _cpu| { |
| 235 | fallback::ChaCha20_ctr32(key, ctr, in_out) |
| 236 | }); |
| 237 | } |
| 238 | |
| 239 | // Verifies the encryption is successful when done on overlapping buffers. |
| 240 | // |
| 241 | // On some branches of the 32-bit x86 and ARM assembly code the in-place |
| 242 | // operation fails in some situations where the input/output buffers are |
| 243 | // not exactly overlapping. Such failures are dependent not only on the |
| 244 | // degree of overlapping but also the length of the data. `encrypt_within` |
| 245 | // works around that. |
| 246 | fn chacha20_test( |
| 247 | max_alignment_and_offset: (usize, usize), |
| 248 | f: impl for<'k, 'o> Fn(&'k Key, Counter, Overlapping<'o>, cpu::Features), |
| 249 | ) { |
| 250 | let cpu = cpu::features(); |
| 251 | |
| 252 | // Reuse a buffer to avoid slowing down the tests with allocations. |
| 253 | let mut buf = vec![0u8; 1300]; |
| 254 | |
| 255 | test::run(test_file!("chacha_tests.txt" ), move |section, test_case| { |
| 256 | assert_eq!(section, "" ); |
| 257 | |
| 258 | let key = test_case .consume_bytes("Key" ); |
| 259 | let key: &[u8; KEY_LEN] = key.as_slice().try_into()?; |
| 260 | let key = Key::new(*key); |
| 261 | |
| 262 | let ctr = test_case .consume_usize("Ctr" ); |
| 263 | let nonce = test_case .consume_bytes("Nonce" ); |
| 264 | let input = test_case .consume_bytes("Input" ); |
| 265 | let output = test_case .consume_bytes("Output" ); |
| 266 | |
| 267 | // Run the test case over all prefixes of the input because the |
| 268 | // behavior of ChaCha20 implementation changes dependent on the |
| 269 | // length of the input. |
| 270 | for len in 0..=input.len() { |
| 271 | #[allow (clippy::cast_possible_truncation)] |
| 272 | chacha20_test_case_inner( |
| 273 | &key, |
| 274 | &nonce, |
| 275 | ctr as u32, |
| 276 | &input[..len], |
| 277 | &output[..len], |
| 278 | &mut buf, |
| 279 | max_alignment_and_offset, |
| 280 | cpu, |
| 281 | &f, |
| 282 | ); |
| 283 | } |
| 284 | |
| 285 | Ok(()) |
| 286 | }); |
| 287 | } |
| 288 | |
| 289 | fn chacha20_test_case_inner( |
| 290 | key: &Key, |
| 291 | nonce: &[u8], |
| 292 | ctr: u32, |
| 293 | input: &[u8], |
| 294 | expected: &[u8], |
| 295 | buf: &mut [u8], |
| 296 | (max_alignment, max_offset): (usize, usize), |
| 297 | cpu: cpu::Features, |
| 298 | f: &impl for<'k, 'o> Fn(&'k Key, Counter, Overlapping<'o>, cpu::Features), |
| 299 | ) { |
| 300 | const ARBITRARY: u8 = 123; |
| 301 | |
| 302 | for alignment in 0..=max_alignment { |
| 303 | buf[..alignment].fill(ARBITRARY); |
| 304 | let buf = &mut buf[alignment..]; |
| 305 | for offset in 0..=max_offset { |
| 306 | let buf = &mut buf[..(offset + input.len())]; |
| 307 | buf[..offset].fill(ARBITRARY); |
| 308 | let src = offset..; |
| 309 | buf[src.clone()].copy_from_slice(input); |
| 310 | |
| 311 | let ctr = Counter::from_nonce_and_ctr( |
| 312 | Nonce::try_assume_unique_for_key(nonce).unwrap(), |
| 313 | ctr, |
| 314 | ); |
| 315 | let in_out = Overlapping::new(buf, src) |
| 316 | .map_err(error::erase::<IndexError>) |
| 317 | .unwrap(); |
| 318 | f(key, ctr, in_out, cpu); |
| 319 | assert_eq!(&buf[..input.len()], expected) |
| 320 | } |
| 321 | } |
| 322 | } |
| 323 | } |
| 324 | |