| 1 | // Copyright 2019 The CryptoCorrosion Contributors | 
| 2 | // Copyright 2020 Developers of the Rand project. | 
|---|
| 3 | // | 
|---|
| 4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | 
|---|
| 5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license | 
|---|
| 6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your | 
|---|
| 7 | // option. This file may not be copied, modified, or distributed | 
|---|
| 8 | // except according to those terms. | 
|---|
| 9 |  | 
|---|
| 10 | //! The ChaCha random number generator. | 
|---|
| 11 |  | 
|---|
| 12 | use ppv_lite86::{dispatch, dispatch_light128}; | 
|---|
| 13 |  | 
|---|
| 14 | pub use ppv_lite86::Machine; | 
|---|
| 15 | use ppv_lite86::{vec128_storage, ArithOps, BitOps32, LaneWords4, MultiLane, StoreBytes, Vec4}; | 
|---|
| 16 |  | 
|---|
| 17 | pub(crate) const BLOCK: usize = 64; | 
|---|
| 18 | pub(crate) const BLOCK64: u64 = BLOCK as u64; | 
|---|
| 19 | const LOG2_BUFBLOCKS: u64 = 2; | 
|---|
| 20 | const BUFBLOCKS: u64 = 1 << LOG2_BUFBLOCKS; | 
|---|
| 21 | pub(crate) const BUFSZ64: u64 = BLOCK64 * BUFBLOCKS; | 
|---|
| 22 | pub(crate) const BUFSZ: usize = BUFSZ64 as usize; | 
|---|
| 23 |  | 
|---|
| 24 | const STREAM_PARAM_NONCE: u32 = 1; | 
|---|
| 25 | const STREAM_PARAM_BLOCK: u32 = 0; | 
|---|
| 26 |  | 
|---|
| 27 | #[ derive(Clone, PartialEq, Eq)] | 
|---|
| 28 | pub struct ChaCha { | 
|---|
| 29 | pub(crate) b: vec128_storage, | 
|---|
| 30 | pub(crate) c: vec128_storage, | 
|---|
| 31 | pub(crate) d: vec128_storage, | 
|---|
| 32 | } | 
|---|
| 33 |  | 
|---|
| 34 | #[ derive(Clone)] | 
|---|
| 35 | pub struct State<V> { | 
|---|
| 36 | pub(crate) a: V, | 
|---|
| 37 | pub(crate) b: V, | 
|---|
| 38 | pub(crate) c: V, | 
|---|
| 39 | pub(crate) d: V, | 
|---|
| 40 | } | 
|---|
| 41 |  | 
|---|
| 42 | #[ inline(always)] | 
|---|
| 43 | pub(crate) fn round<V: ArithOps + BitOps32>(mut x: State<V>) -> State<V> { | 
|---|
| 44 | x.a += x.b; | 
|---|
| 45 | x.d = (x.d ^ x.a).rotate_each_word_right16(); | 
|---|
| 46 | x.c += x.d; | 
|---|
| 47 | x.b = (x.b ^ x.c).rotate_each_word_right20(); | 
|---|
| 48 | x.a += x.b; | 
|---|
| 49 | x.d = (x.d ^ x.a).rotate_each_word_right24(); | 
|---|
| 50 | x.c += x.d; | 
|---|
| 51 | x.b = (x.b ^ x.c).rotate_each_word_right25(); | 
|---|
| 52 | x | 
|---|
| 53 | } | 
|---|
| 54 |  | 
|---|
| 55 | #[ inline(always)] | 
|---|
| 56 | pub(crate) fn diagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> { | 
|---|
| 57 | x.b = x.b.shuffle_lane_words3012(); | 
|---|
| 58 | x.c = x.c.shuffle_lane_words2301(); | 
|---|
| 59 | x.d = x.d.shuffle_lane_words1230(); | 
|---|
| 60 | x | 
|---|
| 61 | } | 
|---|
| 62 | #[ inline(always)] | 
|---|
| 63 | pub(crate) fn undiagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> { | 
|---|
| 64 | x.b = x.b.shuffle_lane_words1230(); | 
|---|
| 65 | x.c = x.c.shuffle_lane_words2301(); | 
|---|
| 66 | x.d = x.d.shuffle_lane_words3012(); | 
|---|
| 67 | x | 
|---|
| 68 | } | 
|---|
| 69 |  | 
|---|
| 70 | impl ChaCha { | 
|---|
| 71 | #[ inline(always)] | 
|---|
| 72 | pub fn new(key: &[u8; 32], nonce: &[u8]) -> Self { | 
|---|
| 73 | init_chacha(key, nonce) | 
|---|
| 74 | } | 
|---|
| 75 |  | 
|---|
| 76 | #[ inline(always)] | 
|---|
| 77 | fn pos64<M: Machine>(&self, m: M) -> u64 { | 
|---|
| 78 | let d: M::u32x4 = m.unpack(self.d); | 
|---|
| 79 | ((d.extract(1) as u64) << 32) | d.extract(0) as u64 | 
|---|
| 80 | } | 
|---|
| 81 |  | 
|---|
| 82 | /// Produce 4 blocks of output, advancing the state | 
|---|
| 83 | #[ inline(always)] | 
|---|
| 84 | pub fn refill4(&mut self, drounds: u32, out: &mut [u8; BUFSZ]) { | 
|---|
| 85 | refill_wide(self, drounds, out) | 
|---|
| 86 | } | 
|---|
| 87 |  | 
|---|
| 88 | #[ inline(always)] | 
|---|
| 89 | pub fn set_block_pos(&mut self, value: u64) { | 
|---|
| 90 | set_stream_param(self, STREAM_PARAM_BLOCK, value) | 
|---|
| 91 | } | 
|---|
| 92 |  | 
|---|
| 93 | #[ inline(always)] | 
|---|
| 94 | pub fn get_block_pos(&self) -> u64 { | 
|---|
| 95 | get_stream_param(self, STREAM_PARAM_BLOCK) | 
|---|
| 96 | } | 
|---|
| 97 |  | 
|---|
| 98 | #[ inline(always)] | 
|---|
| 99 | pub fn set_nonce(&mut self, value: u64) { | 
|---|
| 100 | set_stream_param(self, STREAM_PARAM_NONCE, value) | 
|---|
| 101 | } | 
|---|
| 102 |  | 
|---|
| 103 | #[ inline(always)] | 
|---|
| 104 | pub fn get_nonce(&self) -> u64 { | 
|---|
| 105 | get_stream_param(self, STREAM_PARAM_NONCE) | 
|---|
| 106 | } | 
|---|
| 107 |  | 
|---|
| 108 | #[ inline(always)] | 
|---|
| 109 | pub fn get_seed(&self) -> [u8; 32] { | 
|---|
| 110 | get_seed(self) | 
|---|
| 111 | } | 
|---|
| 112 | } | 
|---|
| 113 |  | 
|---|
| 114 | #[ allow(clippy::many_single_char_names)] | 
|---|
| 115 | #[ inline(always)] | 
|---|
| 116 | fn refill_wide_impl<Mach: Machine>( | 
|---|
| 117 | m: Mach, state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ], | 
|---|
| 118 | ) { | 
|---|
| 119 | let k = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); | 
|---|
| 120 | let mut pos = state.pos64(m); | 
|---|
| 121 | let d0: Mach::u32x4 = m.unpack(state.d); | 
|---|
| 122 | pos = pos.wrapping_add(1); | 
|---|
| 123 | let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); | 
|---|
| 124 | pos = pos.wrapping_add(1); | 
|---|
| 125 | let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); | 
|---|
| 126 | pos = pos.wrapping_add(1); | 
|---|
| 127 | let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); | 
|---|
| 128 |  | 
|---|
| 129 | let b = m.unpack(state.b); | 
|---|
| 130 | let c = m.unpack(state.c); | 
|---|
| 131 | let mut x = State { | 
|---|
| 132 | a: Mach::u32x4x4::from_lanes([k, k, k, k]), | 
|---|
| 133 | b: Mach::u32x4x4::from_lanes([b, b, b, b]), | 
|---|
| 134 | c: Mach::u32x4x4::from_lanes([c, c, c, c]), | 
|---|
| 135 | d: m.unpack(Mach::u32x4x4::from_lanes([d0, d1, d2, d3]).into()), | 
|---|
| 136 | }; | 
|---|
| 137 | for _ in 0..drounds { | 
|---|
| 138 | x = round(x); | 
|---|
| 139 | x = undiagonalize(round(diagonalize(x))); | 
|---|
| 140 | } | 
|---|
| 141 | let mut pos = state.pos64(m); | 
|---|
| 142 | let d0: Mach::u32x4 = m.unpack(state.d); | 
|---|
| 143 | pos = pos.wrapping_add(1); | 
|---|
| 144 | let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); | 
|---|
| 145 | pos = pos.wrapping_add(1); | 
|---|
| 146 | let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); | 
|---|
| 147 | pos = pos.wrapping_add(1); | 
|---|
| 148 | let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); | 
|---|
| 149 | pos = pos.wrapping_add(1); | 
|---|
| 150 | let d4 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); | 
|---|
| 151 |  | 
|---|
| 152 | let (a, b, c, d) = ( | 
|---|
| 153 | x.a.to_lanes(), | 
|---|
| 154 | x.b.to_lanes(), | 
|---|
| 155 | x.c.to_lanes(), | 
|---|
| 156 | x.d.to_lanes(), | 
|---|
| 157 | ); | 
|---|
| 158 | let sb = m.unpack(state.b); | 
|---|
| 159 | let sc = m.unpack(state.c); | 
|---|
| 160 | let sd = [m.unpack(state.d), d1, d2, d3]; | 
|---|
| 161 | state.d = d4.into(); | 
|---|
| 162 | let mut words = out.chunks_exact_mut(16); | 
|---|
| 163 | for ((((&a, &b), &c), &d), &sd) in a.iter().zip(&b).zip(&c).zip(&d).zip(&sd) { | 
|---|
| 164 | (a + k).write_le(words.next().unwrap()); | 
|---|
| 165 | (b + sb).write_le(words.next().unwrap()); | 
|---|
| 166 | (c + sc).write_le(words.next().unwrap()); | 
|---|
| 167 | (d + sd).write_le(words.next().unwrap()); | 
|---|
| 168 | } | 
|---|
| 169 | } | 
|---|
| 170 |  | 
|---|
| 171 | dispatch!(m, Mach, { | 
|---|
| 172 | fn refill_wide(state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ]) { | 
|---|
| 173 | refill_wide_impl(m, state, drounds, out); | 
|---|
| 174 | } | 
|---|
| 175 | }); | 
|---|
| 176 |  | 
|---|
| 177 | // Single-block, rounds-only; shared by try_apply_keystream for tails shorter than BUFSZ | 
|---|
| 178 | // and XChaCha's setup step. | 
|---|
| 179 | dispatch!(m, Mach, { | 
|---|
| 180 | fn refill_narrow_rounds(state: &mut ChaCha, drounds: u32) -> State<vec128_storage> { | 
|---|
| 181 | let k: Mach::u32x4 = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); | 
|---|
| 182 | let mut x = State { | 
|---|
| 183 | a: k, | 
|---|
| 184 | b: m.unpack(state.b), | 
|---|
| 185 | c: m.unpack(state.c), | 
|---|
| 186 | d: m.unpack(state.d), | 
|---|
| 187 | }; | 
|---|
| 188 | for _ in 0..drounds { | 
|---|
| 189 | x = round(x); | 
|---|
| 190 | x = undiagonalize(round(diagonalize(x))); | 
|---|
| 191 | } | 
|---|
| 192 | State { | 
|---|
| 193 | a: x.a.into(), | 
|---|
| 194 | b: x.b.into(), | 
|---|
| 195 | c: x.c.into(), | 
|---|
| 196 | d: x.d.into(), | 
|---|
| 197 | } | 
|---|
| 198 | } | 
|---|
| 199 | }); | 
|---|
| 200 |  | 
|---|
| 201 | dispatch_light128!(m, Mach, { | 
|---|
| 202 | fn set_stream_param(state: &mut ChaCha, param: u32, value: u64) { | 
|---|
| 203 | let d: Mach::u32x4 = m.unpack(state.d); | 
|---|
| 204 | state.d = d | 
|---|
| 205 | .insert((value >> 32) as u32, (param << 1) | 1) | 
|---|
| 206 | .insert(value as u32, param << 1) | 
|---|
| 207 | .into(); | 
|---|
| 208 | } | 
|---|
| 209 | }); | 
|---|
| 210 |  | 
|---|
| 211 | dispatch_light128!(m, Mach, { | 
|---|
| 212 | fn get_stream_param(state: &ChaCha, param: u32) -> u64 { | 
|---|
| 213 | let d: Mach::u32x4 = m.unpack(state.d); | 
|---|
| 214 | ((d.extract((param << 1) | 1) as u64) << 32) | d.extract(param << 1) as u64 | 
|---|
| 215 | } | 
|---|
| 216 | }); | 
|---|
| 217 |  | 
|---|
| 218 | dispatch_light128!(m, Mach, { | 
|---|
| 219 | fn get_seed(state: &ChaCha) -> [u8; 32] { | 
|---|
| 220 | let b: Mach::u32x4 = m.unpack(state.b); | 
|---|
| 221 | let c: Mach::u32x4 = m.unpack(state.c); | 
|---|
| 222 | let mut key = [0u8; 32]; | 
|---|
| 223 | b.write_le(&mut key[..16]); | 
|---|
| 224 | c.write_le(&mut key[16..]); | 
|---|
| 225 | key | 
|---|
| 226 | } | 
|---|
| 227 | }); | 
|---|
| 228 |  | 
|---|
| 229 | fn read_u32le(xs: &[u8]) -> u32 { | 
|---|
| 230 | assert_eq!(xs.len(), 4); | 
|---|
| 231 | u32::from(xs[0]) | (u32::from(xs[1]) << 8) | (u32::from(xs[2]) << 16) | (u32::from(xs[3]) << 24) | 
|---|
| 232 | } | 
|---|
| 233 |  | 
|---|
| 234 | dispatch_light128!(m, Mach, { | 
|---|
| 235 | fn init_chacha(key: &[u8; 32], nonce: &[u8]) -> ChaCha { | 
|---|
| 236 | let ctr_nonce = [ | 
|---|
| 237 | 0, | 
|---|
| 238 | if nonce.len() == 12 { | 
|---|
| 239 | read_u32le(&nonce[0..4]) | 
|---|
| 240 | } else { | 
|---|
| 241 | 0 | 
|---|
| 242 | }, | 
|---|
| 243 | read_u32le(&nonce[nonce.len() - 8..nonce.len() - 4]), | 
|---|
| 244 | read_u32le(&nonce[nonce.len() - 4..]), | 
|---|
| 245 | ]; | 
|---|
| 246 | let key0: Mach::u32x4 = m.read_le(&key[..16]); | 
|---|
| 247 | let key1: Mach::u32x4 = m.read_le(&key[16..]); | 
|---|
| 248 | ChaCha { | 
|---|
| 249 | b: key0.into(), | 
|---|
| 250 | c: key1.into(), | 
|---|
| 251 | d: ctr_nonce.into(), | 
|---|
| 252 | } | 
|---|
| 253 | } | 
|---|
| 254 | }); | 
|---|
| 255 |  | 
|---|
| 256 | dispatch_light128!(m, Mach, { | 
|---|
| 257 | fn init_chacha_x(key: &[u8; 32], nonce: &[u8; 24], rounds: u32) -> ChaCha { | 
|---|
| 258 | let key0: Mach::u32x4 = m.read_le(&key[..16]); | 
|---|
| 259 | let key1: Mach::u32x4 = m.read_le(&key[16..]); | 
|---|
| 260 | let nonce0: Mach::u32x4 = m.read_le(&nonce[..16]); | 
|---|
| 261 | let mut state = ChaCha { | 
|---|
| 262 | b: key0.into(), | 
|---|
| 263 | c: key1.into(), | 
|---|
| 264 | d: nonce0.into(), | 
|---|
| 265 | }; | 
|---|
| 266 | let x = refill_narrow_rounds(&mut state, rounds); | 
|---|
| 267 | let ctr_nonce1 = [0, 0, read_u32le(&nonce[16..20]), read_u32le(&nonce[20..24])]; | 
|---|
| 268 | state.b = x.a; | 
|---|
| 269 | state.c = x.d; | 
|---|
| 270 | state.d = ctr_nonce1.into(); | 
|---|
| 271 | state | 
|---|
| 272 | } | 
|---|
| 273 | }); | 
|---|
| 274 |  | 
|---|