1 | // Copyright 2019 The CryptoCorrosion Contributors |
---|---|

2 | // Copyright 2020 Developers of the Rand project. |

3 | // |

4 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |

5 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |

6 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |

7 | // option. This file may not be copied, modified, or distributed |

8 | // except according to those terms. |

9 | |

10 | //! The ChaCha random number generator. |

11 | |

12 | use ppv_lite86::{dispatch, dispatch_light128}; |

13 | |

14 | pub use ppv_lite86::Machine; |

15 | use ppv_lite86::{vec128_storage, ArithOps, BitOps32, LaneWords4, MultiLane, StoreBytes, Vec4}; |

16 | |

17 | pub(crate) const BLOCK: usize = 64; |

18 | pub(crate) const BLOCK64: u64 = BLOCK as u64; |

19 | const LOG2_BUFBLOCKS: u64 = 2; |

20 | const BUFBLOCKS: u64 = 1 << LOG2_BUFBLOCKS; |

21 | pub(crate) const BUFSZ64: u64 = BLOCK64 * BUFBLOCKS; |

22 | pub(crate) const BUFSZ: usize = BUFSZ64 as usize; |

23 | |

24 | const STREAM_PARAM_NONCE: u32 = 1; |

25 | const STREAM_PARAM_BLOCK: u32 = 0; |

26 | |

27 | #[derive(Clone, PartialEq, Eq)] |

28 | pub struct ChaCha { |

29 | pub(crate) b: vec128_storage, |

30 | pub(crate) c: vec128_storage, |

31 | pub(crate) d: vec128_storage, |

32 | } |

33 | |

34 | #[derive(Clone)] |

35 | pub struct State<V> { |

36 | pub(crate) a: V, |

37 | pub(crate) b: V, |

38 | pub(crate) c: V, |

39 | pub(crate) d: V, |

40 | } |

41 | |

42 | #[inline(always)] |

43 | pub(crate) fn round<V: ArithOps + BitOps32>(mut x: State<V>) -> State<V> { |

44 | x.a += x.b; |

45 | x.d = (x.d ^ x.a).rotate_each_word_right16(); |

46 | x.c += x.d; |

47 | x.b = (x.b ^ x.c).rotate_each_word_right20(); |

48 | x.a += x.b; |

49 | x.d = (x.d ^ x.a).rotate_each_word_right24(); |

50 | x.c += x.d; |

51 | x.b = (x.b ^ x.c).rotate_each_word_right25(); |

52 | x |

53 | } |

54 | |

55 | #[inline(always)] |

56 | pub(crate) fn diagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> { |

57 | x.b = x.b.shuffle_lane_words3012(); |

58 | x.c = x.c.shuffle_lane_words2301(); |

59 | x.d = x.d.shuffle_lane_words1230(); |

60 | x |

61 | } |

62 | #[inline(always)] |

63 | pub(crate) fn undiagonalize<V: LaneWords4>(mut x: State<V>) -> State<V> { |

64 | x.b = x.b.shuffle_lane_words1230(); |

65 | x.c = x.c.shuffle_lane_words2301(); |

66 | x.d = x.d.shuffle_lane_words3012(); |

67 | x |

68 | } |

69 | |

70 | impl ChaCha { |

71 | #[inline(always)] |

72 | pub fn new(key: &[u8; 32], nonce: &[u8]) -> Self { |

73 | init_chacha(key, nonce) |

74 | } |

75 | |

76 | #[inline(always)] |

77 | fn pos64<M: Machine>(&self, m: M) -> u64 { |

78 | let d: M::u32x4 = m.unpack(self.d); |

79 | ((d.extract(1) as u64) << 32) | d.extract(0) as u64 |

80 | } |

81 | |

82 | /// Produce 4 blocks of output, advancing the state |

83 | #[inline(always)] |

84 | pub fn refill4(&mut self, drounds: u32, out: &mut [u8; BUFSZ]) { |

85 | refill_wide(self, drounds, out) |

86 | } |

87 | |

88 | #[inline(always)] |

89 | pub fn set_block_pos(&mut self, value: u64) { |

90 | set_stream_param(self, STREAM_PARAM_BLOCK, value) |

91 | } |

92 | |

93 | #[inline(always)] |

94 | pub fn get_block_pos(&self) -> u64 { |

95 | get_stream_param(self, STREAM_PARAM_BLOCK) |

96 | } |

97 | |

98 | #[inline(always)] |

99 | pub fn set_nonce(&mut self, value: u64) { |

100 | set_stream_param(self, STREAM_PARAM_NONCE, value) |

101 | } |

102 | |

103 | #[inline(always)] |

104 | pub fn get_nonce(&self) -> u64 { |

105 | get_stream_param(self, STREAM_PARAM_NONCE) |

106 | } |

107 | |

108 | #[inline(always)] |

109 | pub fn get_seed(&self) -> [u8; 32] { |

110 | get_seed(self) |

111 | } |

112 | } |

113 | |

114 | #[allow(clippy::many_single_char_names)] |

115 | #[inline(always)] |

116 | fn refill_wide_impl<Mach: Machine>( |

117 | m: Mach, state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ], |

118 | ) { |

119 | let k = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); |

120 | let mut pos = state.pos64(m); |

121 | let d0: Mach::u32x4 = m.unpack(state.d); |

122 | pos = pos.wrapping_add(1); |

123 | let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |

124 | pos = pos.wrapping_add(1); |

125 | let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |

126 | pos = pos.wrapping_add(1); |

127 | let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |

128 | |

129 | let b = m.unpack(state.b); |

130 | let c = m.unpack(state.c); |

131 | let mut x = State { |

132 | a: Mach::u32x4x4::from_lanes([k, k, k, k]), |

133 | b: Mach::u32x4x4::from_lanes([b, b, b, b]), |

134 | c: Mach::u32x4x4::from_lanes([c, c, c, c]), |

135 | d: m.unpack(Mach::u32x4x4::from_lanes([d0, d1, d2, d3]).into()), |

136 | }; |

137 | for _ in 0..drounds { |

138 | x = round(x); |

139 | x = undiagonalize(round(diagonalize(x))); |

140 | } |

141 | let mut pos = state.pos64(m); |

142 | let d0: Mach::u32x4 = m.unpack(state.d); |

143 | pos = pos.wrapping_add(1); |

144 | let d1 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |

145 | pos = pos.wrapping_add(1); |

146 | let d2 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |

147 | pos = pos.wrapping_add(1); |

148 | let d3 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |

149 | pos = pos.wrapping_add(1); |

150 | let d4 = d0.insert((pos >> 32) as u32, 1).insert(pos as u32, 0); |

151 | |

152 | let (a, b, c, d) = ( |

153 | x.a.to_lanes(), |

154 | x.b.to_lanes(), |

155 | x.c.to_lanes(), |

156 | x.d.to_lanes(), |

157 | ); |

158 | let sb = m.unpack(state.b); |

159 | let sc = m.unpack(state.c); |

160 | let sd = [m.unpack(state.d), d1, d2, d3]; |

161 | state.d = d4.into(); |

162 | let mut words = out.chunks_exact_mut(16); |

163 | for ((((&a, &b), &c), &d), &sd) in a.iter().zip(&b).zip(&c).zip(&d).zip(&sd) { |

164 | (a + k).write_le(words.next().unwrap()); |

165 | (b + sb).write_le(words.next().unwrap()); |

166 | (c + sc).write_le(words.next().unwrap()); |

167 | (d + sd).write_le(words.next().unwrap()); |

168 | } |

169 | } |

170 | |

171 | dispatch!(m, Mach, { |

172 | fn refill_wide(state: &mut ChaCha, drounds: u32, out: &mut [u8; BUFSZ]) { |

173 | refill_wide_impl(m, state, drounds, out); |

174 | } |

175 | }); |

176 | |

177 | // Single-block, rounds-only; shared by try_apply_keystream for tails shorter than BUFSZ |

178 | // and XChaCha's setup step. |

179 | dispatch!(m, Mach, { |

180 | fn refill_narrow_rounds(state: &mut ChaCha, drounds: u32) -> State<vec128_storage> { |

181 | let k: Mach::u32x4 = m.vec([0x6170_7865, 0x3320_646e, 0x7962_2d32, 0x6b20_6574]); |

182 | let mut x = State { |

183 | a: k, |

184 | b: m.unpack(state.b), |

185 | c: m.unpack(state.c), |

186 | d: m.unpack(state.d), |

187 | }; |

188 | for _ in 0..drounds { |

189 | x = round(x); |

190 | x = undiagonalize(round(diagonalize(x))); |

191 | } |

192 | State { |

193 | a: x.a.into(), |

194 | b: x.b.into(), |

195 | c: x.c.into(), |

196 | d: x.d.into(), |

197 | } |

198 | } |

199 | }); |

200 | |

201 | dispatch_light128!(m, Mach, { |

202 | fn set_stream_param(state: &mut ChaCha, param: u32, value: u64) { |

203 | let d: Mach::u32x4 = m.unpack(state.d); |

204 | state.d = d |

205 | .insert((value >> 32) as u32, (param << 1) | 1) |

206 | .insert(value as u32, param << 1) |

207 | .into(); |

208 | } |

209 | }); |

210 | |

211 | dispatch_light128!(m, Mach, { |

212 | fn get_stream_param(state: &ChaCha, param: u32) -> u64 { |

213 | let d: Mach::u32x4 = m.unpack(state.d); |

214 | ((d.extract((param << 1) | 1) as u64) << 32) | d.extract(param << 1) as u64 |

215 | } |

216 | }); |

217 | |

218 | dispatch_light128!(m, Mach, { |

219 | fn get_seed(state: &ChaCha) -> [u8; 32] { |

220 | let b: Mach::u32x4 = m.unpack(state.b); |

221 | let c: Mach::u32x4 = m.unpack(state.c); |

222 | let mut key = [0u8; 32]; |

223 | b.write_le(&mut key[..16]); |

224 | c.write_le(&mut key[16..]); |

225 | key |

226 | } |

227 | }); |

228 | |

229 | fn read_u32le(xs: &[u8]) -> u32 { |

230 | assert_eq!(xs.len(), 4); |

231 | u32::from(xs[0]) | (u32::from(xs[1]) << 8) | (u32::from(xs[2]) << 16) | (u32::from(xs[3]) << 24) |

232 | } |

233 | |

234 | dispatch_light128!(m, Mach, { |

235 | fn init_chacha(key: &[u8; 32], nonce: &[u8]) -> ChaCha { |

236 | let ctr_nonce = [ |

237 | 0, |

238 | if nonce.len() == 12 { |

239 | read_u32le(&nonce[0..4]) |

240 | } else { |

241 | 0 |

242 | }, |

243 | read_u32le(&nonce[nonce.len() - 8..nonce.len() - 4]), |

244 | read_u32le(&nonce[nonce.len() - 4..]), |

245 | ]; |

246 | let key0: Mach::u32x4 = m.read_le(&key[..16]); |

247 | let key1: Mach::u32x4 = m.read_le(&key[16..]); |

248 | ChaCha { |

249 | b: key0.into(), |

250 | c: key1.into(), |

251 | d: ctr_nonce.into(), |

252 | } |

253 | } |

254 | }); |

255 | |

256 | dispatch_light128!(m, Mach, { |

257 | fn init_chacha_x(key: &[u8; 32], nonce: &[u8; 24], rounds: u32) -> ChaCha { |

258 | let key0: Mach::u32x4 = m.read_le(&key[..16]); |

259 | let key1: Mach::u32x4 = m.read_le(&key[16..]); |

260 | let nonce0: Mach::u32x4 = m.read_le(&nonce[..16]); |

261 | let mut state = ChaCha { |

262 | b: key0.into(), |

263 | c: key1.into(), |

264 | d: nonce0.into(), |

265 | }; |

266 | let x = refill_narrow_rounds(&mut state, rounds); |

267 | let ctr_nonce1 = [0, 0, read_u32le(&nonce[16..20]), read_u32le(&nonce[20..24])]; |

268 | state.b = x.a; |

269 | state.c = x.d; |

270 | state.d = ctr_nonce1.into(); |

271 | state |

272 | } |

273 | }); |

274 |