1 | // Copyright 2018 Developers of the Rand project. |
---|---|

2 | // |

3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |

4 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |

5 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |

6 | // option. This file may not be copied, modified, or distributed |

7 | // except according to those terms. |

8 | |

9 | //! The implementations of the `Standard` distribution for other built-in types. |

10 | |

11 | use core::char; |

12 | use core::num::Wrapping; |

13 | #[cfg(feature = "alloc")] |

14 | use alloc::string::String; |

15 | |

16 | use crate::distributions::{Distribution, Standard, Uniform}; |

17 | #[cfg(feature = "alloc")] |

18 | use crate::distributions::DistString; |

19 | use crate::Rng; |

20 | |

21 | #[cfg(feature = "serde1")] |

22 | use serde::{Serialize, Deserialize}; |

23 | #[cfg(feature = "min_const_gen")] |

24 | use core::mem::{self, MaybeUninit}; |

25 | |

26 | |

27 | // ----- Sampling distributions ----- |

28 | |

29 | /// Sample a `u8`, uniformly distributed over ASCII letters and numbers: |

30 | /// a-z, A-Z and 0-9. |

31 | /// |

32 | /// # Example |

33 | /// |

34 | /// ``` |

35 | /// use rand::{Rng, thread_rng}; |

36 | /// use rand::distributions::Alphanumeric; |

37 | /// |

38 | /// let mut rng = thread_rng(); |

39 | /// let chars: String = (0..7).map(|_| rng.sample(Alphanumeric) as char).collect(); |

40 | /// println!("Random chars: {}", chars); |

41 | /// ``` |

42 | /// |

43 | /// The [`DistString`] trait provides an easier method of generating |

44 | /// a random `String`, and offers more efficient allocation: |

45 | /// ``` |

46 | /// use rand::distributions::{Alphanumeric, DistString}; |

47 | /// let string = Alphanumeric.sample_string(&mut rand::thread_rng(), 16); |

48 | /// println!("Random string: {}", string); |

49 | /// ``` |

50 | /// |

51 | /// # Passwords |

52 | /// |

53 | /// Users sometimes ask whether it is safe to use a string of random characters |

54 | /// as a password. In principle, all RNGs in Rand implementing `CryptoRng` are |

55 | /// suitable as a source of randomness for generating passwords (if they are |

56 | /// properly seeded), but it is more conservative to only use randomness |

57 | /// directly from the operating system via the `getrandom` crate, or the |

58 | /// corresponding bindings of a crypto library. |

59 | /// |

60 | /// When generating passwords or keys, it is important to consider the threat |

61 | /// model and in some cases the memorability of the password. This is out of |

62 | /// scope of the Rand project, and therefore we defer to the following |

63 | /// references: |

64 | /// |

65 | /// - [Wikipedia article on Password Strength](https://en.wikipedia.org/wiki/Password_strength) |

66 | /// - [Diceware for generating memorable passwords](https://en.wikipedia.org/wiki/Diceware) |

67 | #[derive(Debug, Clone, Copy)] |

68 | #[cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] |

69 | pub struct Alphanumeric; |

70 | |

71 | |

72 | // ----- Implementations of distributions ----- |

73 | |

74 | impl Distribution<char> for Standard { |

75 | #[inline] |

76 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char { |

77 | // A valid `char` is either in the interval `[0, 0xD800)` or |

78 | // `(0xDFFF, 0x11_0000)`. All `char`s must therefore be in |

79 | // `[0, 0x11_0000)` but not in the "gap" `[0xD800, 0xDFFF]` which is |

80 | // reserved for surrogates. This is the size of that gap. |

81 | const GAP_SIZE: u32 = 0xDFFF - 0xD800 + 1; |

82 | |

83 | // Uniform::new(0, 0x11_0000 - GAP_SIZE) can also be used but it |

84 | // seemed slower. |

85 | let range = Uniform::new(GAP_SIZE, 0x11_0000); |

86 | |

87 | let mut n = range.sample(rng); |

88 | if n <= 0xDFFF { |

89 | n -= GAP_SIZE; |

90 | } |

91 | unsafe { char::from_u32_unchecked(n) } |

92 | } |

93 | } |

94 | |

95 | /// Note: the `String` is potentially left with excess capacity; optionally the |

96 | /// user may call `string.shrink_to_fit()` afterwards. |

97 | #[cfg(feature = "alloc")] |

98 | impl DistString for Standard { |

99 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, s: &mut String, len: usize) { |

100 | // A char is encoded with at most four bytes, thus this reservation is |

101 | // guaranteed to be sufficient. We do not shrink_to_fit afterwards so |

102 | // that repeated usage on the same `String` buffer does not reallocate. |

103 | s.reserve(4 * len); |

104 | s.extend(Distribution::<char>::sample_iter(self, rng).take(len)); |

105 | } |

106 | } |

107 | |

108 | impl Distribution<u8> for Alphanumeric { |

109 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 { |

110 | const RANGE: u32 = 26 + 26 + 10; |

111 | const GEN_ASCII_STR_CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ |

112 | abcdefghijklmnopqrstuvwxyz\ |

113 | 0123456789"; |

114 | // We can pick from 62 characters. This is so close to a power of 2, 64, |

115 | // that we can do better than `Uniform`. Use a simple bitshift and |

116 | // rejection sampling. We do not use a bitmask, because for small RNGs |

117 | // the most significant bits are usually of higher quality. |

118 | loop { |

119 | let var = rng.next_u32() >> (32 - 6); |

120 | if var < RANGE { |

121 | return GEN_ASCII_STR_CHARSET[var as usize]; |

122 | } |

123 | } |

124 | } |

125 | } |

126 | |

127 | #[cfg(feature = "alloc")] |

128 | impl DistString for Alphanumeric { |

129 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) { |

130 | unsafe { |

131 | let v = string.as_mut_vec(); |

132 | v.extend(self.sample_iter(rng).take(len)); |

133 | } |

134 | } |

135 | } |

136 | |

137 | impl Distribution<bool> for Standard { |

138 | #[inline] |

139 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> bool { |

140 | // We can compare against an arbitrary bit of an u32 to get a bool. |

141 | // Because the least significant bits of a lower quality RNG can have |

142 | // simple patterns, we compare against the most significant bit. This is |

143 | // easiest done using a sign test. |

144 | (rng.next_u32() as i32) < 0 |

145 | } |

146 | } |

147 | |

148 | macro_rules! tuple_impl { |

149 | // use variables to indicate the arity of the tuple |

150 | ($($tyvar:ident),* ) => { |

151 | // the trailing commas are for the 1 tuple |

152 | impl< $( $tyvar ),* > |

153 | Distribution<( $( $tyvar ),* , )> |

154 | for Standard |

155 | where $( Standard: Distribution<$tyvar> ),* |

156 | { |

157 | #[inline] |

158 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> ( $( $tyvar ),* , ) { |

159 | ( |

160 | // use the $tyvar's to get the appropriate number of |

161 | // repeats (they're not actually needed) |

162 | $( |

163 | _rng.gen::<$tyvar>() |

164 | ),* |

165 | , |

166 | ) |

167 | } |

168 | } |

169 | } |

170 | } |

171 | |

172 | impl Distribution<()> for Standard { |

173 | #[allow(clippy::unused_unit)] |

174 | #[inline] |

175 | fn sample<R: Rng + ?Sized>(&self, _: &mut R) -> () { |

176 | () |

177 | } |

178 | } |

179 | tuple_impl! {A} |

180 | tuple_impl! {A, B} |

181 | tuple_impl! {A, B, C} |

182 | tuple_impl! {A, B, C, D} |

183 | tuple_impl! {A, B, C, D, E} |

184 | tuple_impl! {A, B, C, D, E, F} |

185 | tuple_impl! {A, B, C, D, E, F, G} |

186 | tuple_impl! {A, B, C, D, E, F, G, H} |

187 | tuple_impl! {A, B, C, D, E, F, G, H, I} |

188 | tuple_impl! {A, B, C, D, E, F, G, H, I, J} |

189 | tuple_impl! {A, B, C, D, E, F, G, H, I, J, K} |

190 | tuple_impl! {A, B, C, D, E, F, G, H, I, J, K, L} |

191 | |

192 | #[cfg(feature = "min_const_gen")] |

193 | #[cfg_attr(doc_cfg, doc(cfg(feature = "min_const_gen")))] |

194 | impl<T, const N: usize> Distribution<[T; N]> for Standard |

195 | where Standard: Distribution<T> |

196 | { |

197 | #[inline] |

198 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> [T; N] { |

199 | let mut buff: [MaybeUninit<T>; N] = unsafe { MaybeUninit::uninit().assume_init() }; |

200 | |

201 | for elem in &mut buff { |

202 | *elem = MaybeUninit::new(_rng.gen()); |

203 | } |

204 | |

205 | unsafe { mem::transmute_copy::<_, _>(&buff) } |

206 | } |

207 | } |

208 | |

209 | #[cfg(not(feature = "min_const_gen"))] |

210 | macro_rules! array_impl { |

211 | // recursive, given at least one type parameter: |

212 | {$n:expr, $t:ident, $($ts:ident,)*} => { |

213 | array_impl!{($n - 1), $($ts,)*} |

214 | |

215 | impl<T> Distribution<[T; $n]> for Standard where Standard: Distribution<T> { |

216 | #[inline] |

217 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> [T; $n] { |

218 | [_rng.gen::<$t>(), $(_rng.gen::<$ts>()),*] |

219 | } |

220 | } |

221 | }; |

222 | // empty case: |

223 | {$n:expr,} => { |

224 | impl<T> Distribution<[T; $n]> for Standard { |

225 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> [T; $n] { [] } |

226 | } |

227 | }; |

228 | } |

229 | |

230 | #[cfg(not(feature = "min_const_gen"))] |

231 | array_impl! {32, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,} |

232 | |

233 | impl<T> Distribution<Option<T>> for Standard |

234 | where Standard: Distribution<T> |

235 | { |

236 | #[inline] |

237 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<T> { |

238 | // UFCS is needed here: https://github.com/rust-lang/rust/issues/24066 |

239 | if rng.gen::<bool>() { |

240 | Some(rng.gen()) |

241 | } else { |

242 | None |

243 | } |

244 | } |

245 | } |

246 | |

247 | impl<T> Distribution<Wrapping<T>> for Standard |

248 | where Standard: Distribution<T> |

249 | { |

250 | #[inline] |

251 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Wrapping<T> { |

252 | Wrapping(rng.gen()) |

253 | } |

254 | } |

255 | |

256 | |

257 | #[cfg(test)] |

258 | mod tests { |

259 | use super::*; |

260 | use crate::RngCore; |

261 | #[cfg(feature = "alloc")] use alloc::string::String; |

262 | |

263 | #[test] |

264 | fn test_misc() { |

265 | let rng: &mut dyn RngCore = &mut crate::test::rng(820); |

266 | |

267 | rng.sample::<char, _>(Standard); |

268 | rng.sample::<bool, _>(Standard); |

269 | } |

270 | |

271 | #[cfg(feature = "alloc")] |

272 | #[test] |

273 | fn test_chars() { |

274 | use core::iter; |

275 | let mut rng = crate::test::rng(805); |

276 | |

277 | // Test by generating a relatively large number of chars, so we also |

278 | // take the rejection sampling path. |

279 | let word: String = iter::repeat(()) |

280 | .map(|()| rng.gen::<char>()) |

281 | .take(1000) |

282 | .collect(); |

283 | assert!(!word.is_empty()); |

284 | } |

285 | |

286 | #[test] |

287 | fn test_alphanumeric() { |

288 | let mut rng = crate::test::rng(806); |

289 | |

290 | // Test by generating a relatively large number of chars, so we also |

291 | // take the rejection sampling path. |

292 | let mut incorrect = false; |

293 | for _ in 0..100 { |

294 | let c: char = rng.sample(Alphanumeric).into(); |

295 | incorrect |= !(('0'..= '9').contains(&c) || |

296 | ('A'..= 'Z').contains(&c) || |

297 | ('a'..= 'z').contains(&c) ); |

298 | } |

299 | assert!(!incorrect); |

300 | } |

301 | |

302 | #[test] |

303 | fn value_stability() { |

304 | fn test_samples<T: Copy + core::fmt::Debug + PartialEq, D: Distribution<T>>( |

305 | distr: &D, zero: T, expected: &[T], |

306 | ) { |

307 | let mut rng = crate::test::rng(807); |

308 | let mut buf = [zero; 5]; |

309 | for x in &mut buf { |

310 | *x = rng.sample(&distr); |

311 | } |

312 | assert_eq!(&buf, expected); |

313 | } |

314 | |

315 | test_samples(&Standard, 'a', &[ |

316 | ' \u{8cdac}', |

317 | ' \u{a346a}', |

318 | ' \u{80120}', |

319 | ' \u{ed692}', |

320 | ' \u{35888}', |

321 | ]); |

322 | test_samples(&Alphanumeric, 0, &[104, 109, 101, 51, 77]); |

323 | test_samples(&Standard, false, &[true, true, false, true, false]); |

324 | test_samples(&Standard, None as Option<bool>, &[ |

325 | Some(true), |

326 | None, |

327 | Some(false), |

328 | None, |

329 | Some(false), |

330 | ]); |

331 | test_samples(&Standard, Wrapping(0i32), &[ |

332 | Wrapping(-2074640887), |

333 | Wrapping(-1719949321), |

334 | Wrapping(2018088303), |

335 | Wrapping(-547181756), |

336 | Wrapping(838957336), |

337 | ]); |

338 | |

339 | // We test only sub-sets of tuple and array impls |

340 | test_samples(&Standard, (), &[(), (), (), (), ()]); |

341 | test_samples(&Standard, (false,), &[ |

342 | (true,), |

343 | (true,), |

344 | (false,), |

345 | (true,), |

346 | (false,), |

347 | ]); |

348 | test_samples(&Standard, (false, false), &[ |

349 | (true, true), |

350 | (false, true), |

351 | (false, false), |

352 | (true, false), |

353 | (false, false), |

354 | ]); |

355 | |

356 | test_samples(&Standard, [0u8; 0], &[[], [], [], [], []]); |

357 | test_samples(&Standard, [0u8; 3], &[ |

358 | [9, 247, 111], |

359 | [68, 24, 13], |

360 | [174, 19, 194], |

361 | [172, 69, 213], |

362 | [149, 207, 29], |

363 | ]); |

364 | } |

365 | } |

366 |