1 | // Copyright 2018 Developers of the Rand project. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | //! The implementations of the `Standard` distribution for other built-in types. |
10 | |
11 | use core::char; |
12 | use core::num::Wrapping; |
13 | #[cfg (feature = "alloc" )] |
14 | use alloc::string::String; |
15 | |
16 | use crate::distributions::{Distribution, Standard, Uniform}; |
17 | #[cfg (feature = "alloc" )] |
18 | use crate::distributions::DistString; |
19 | use crate::Rng; |
20 | |
21 | #[cfg (feature = "serde1" )] |
22 | use serde::{Serialize, Deserialize}; |
23 | #[cfg (feature = "min_const_gen" )] |
24 | use core::mem::{self, MaybeUninit}; |
25 | |
26 | |
27 | // ----- Sampling distributions ----- |
28 | |
29 | /// Sample a `u8`, uniformly distributed over ASCII letters and numbers: |
30 | /// a-z, A-Z and 0-9. |
31 | /// |
32 | /// # Example |
33 | /// |
34 | /// ``` |
35 | /// use rand::{Rng, thread_rng}; |
36 | /// use rand::distributions::Alphanumeric; |
37 | /// |
38 | /// let mut rng = thread_rng(); |
39 | /// let chars: String = (0..7).map(|_| rng.sample(Alphanumeric) as char).collect(); |
40 | /// println!("Random chars: {}" , chars); |
41 | /// ``` |
42 | /// |
43 | /// The [`DistString`] trait provides an easier method of generating |
44 | /// a random `String`, and offers more efficient allocation: |
45 | /// ``` |
46 | /// use rand::distributions::{Alphanumeric, DistString}; |
47 | /// let string = Alphanumeric.sample_string(&mut rand::thread_rng(), 16); |
48 | /// println!("Random string: {}" , string); |
49 | /// ``` |
50 | /// |
51 | /// # Passwords |
52 | /// |
53 | /// Users sometimes ask whether it is safe to use a string of random characters |
54 | /// as a password. In principle, all RNGs in Rand implementing `CryptoRng` are |
55 | /// suitable as a source of randomness for generating passwords (if they are |
56 | /// properly seeded), but it is more conservative to only use randomness |
57 | /// directly from the operating system via the `getrandom` crate, or the |
58 | /// corresponding bindings of a crypto library. |
59 | /// |
60 | /// When generating passwords or keys, it is important to consider the threat |
61 | /// model and in some cases the memorability of the password. This is out of |
62 | /// scope of the Rand project, and therefore we defer to the following |
63 | /// references: |
64 | /// |
65 | /// - [Wikipedia article on Password Strength](https://en.wikipedia.org/wiki/Password_strength) |
66 | /// - [Diceware for generating memorable passwords](https://en.wikipedia.org/wiki/Diceware) |
67 | #[derive(Debug, Clone, Copy)] |
68 | #[cfg_attr (feature = "serde1" , derive(Serialize, Deserialize))] |
69 | pub struct Alphanumeric; |
70 | |
71 | |
72 | // ----- Implementations of distributions ----- |
73 | |
74 | impl Distribution<char> for Standard { |
75 | #[inline ] |
76 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char { |
77 | // A valid `char` is either in the interval `[0, 0xD800)` or |
78 | // `(0xDFFF, 0x11_0000)`. All `char`s must therefore be in |
79 | // `[0, 0x11_0000)` but not in the "gap" `[0xD800, 0xDFFF]` which is |
80 | // reserved for surrogates. This is the size of that gap. |
81 | const GAP_SIZE: u32 = 0xDFFF - 0xD800 + 1; |
82 | |
83 | // Uniform::new(0, 0x11_0000 - GAP_SIZE) can also be used but it |
84 | // seemed slower. |
85 | let range = Uniform::new(GAP_SIZE, 0x11_0000); |
86 | |
87 | let mut n = range.sample(rng); |
88 | if n <= 0xDFFF { |
89 | n -= GAP_SIZE; |
90 | } |
91 | unsafe { char::from_u32_unchecked(n) } |
92 | } |
93 | } |
94 | |
95 | /// Note: the `String` is potentially left with excess capacity; optionally the |
96 | /// user may call `string.shrink_to_fit()` afterwards. |
97 | #[cfg (feature = "alloc" )] |
98 | impl DistString for Standard { |
99 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, s: &mut String, len: usize) { |
100 | // A char is encoded with at most four bytes, thus this reservation is |
101 | // guaranteed to be sufficient. We do not shrink_to_fit afterwards so |
102 | // that repeated usage on the same `String` buffer does not reallocate. |
103 | s.reserve(4 * len); |
104 | s.extend(Distribution::<char>::sample_iter(self, rng).take(len)); |
105 | } |
106 | } |
107 | |
108 | impl Distribution<u8> for Alphanumeric { |
109 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 { |
110 | const RANGE: u32 = 26 + 26 + 10; |
111 | const GEN_ASCII_STR_CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ |
112 | abcdefghijklmnopqrstuvwxyz\ |
113 | 0123456789" ; |
114 | // We can pick from 62 characters. This is so close to a power of 2, 64, |
115 | // that we can do better than `Uniform`. Use a simple bitshift and |
116 | // rejection sampling. We do not use a bitmask, because for small RNGs |
117 | // the most significant bits are usually of higher quality. |
118 | loop { |
119 | let var = rng.next_u32() >> (32 - 6); |
120 | if var < RANGE { |
121 | return GEN_ASCII_STR_CHARSET[var as usize]; |
122 | } |
123 | } |
124 | } |
125 | } |
126 | |
127 | #[cfg (feature = "alloc" )] |
128 | impl DistString for Alphanumeric { |
129 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) { |
130 | unsafe { |
131 | let v = string.as_mut_vec(); |
132 | v.extend(self.sample_iter(rng).take(len)); |
133 | } |
134 | } |
135 | } |
136 | |
137 | impl Distribution<bool> for Standard { |
138 | #[inline ] |
139 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> bool { |
140 | // We can compare against an arbitrary bit of an u32 to get a bool. |
141 | // Because the least significant bits of a lower quality RNG can have |
142 | // simple patterns, we compare against the most significant bit. This is |
143 | // easiest done using a sign test. |
144 | (rng.next_u32() as i32) < 0 |
145 | } |
146 | } |
147 | |
148 | macro_rules! tuple_impl { |
149 | // use variables to indicate the arity of the tuple |
150 | ($($tyvar:ident),* ) => { |
151 | // the trailing commas are for the 1 tuple |
152 | impl< $( $tyvar ),* > |
153 | Distribution<( $( $tyvar ),* , )> |
154 | for Standard |
155 | where $( Standard: Distribution<$tyvar> ),* |
156 | { |
157 | #[inline] |
158 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> ( $( $tyvar ),* , ) { |
159 | ( |
160 | // use the $tyvar's to get the appropriate number of |
161 | // repeats (they're not actually needed) |
162 | $( |
163 | _rng.gen::<$tyvar>() |
164 | ),* |
165 | , |
166 | ) |
167 | } |
168 | } |
169 | } |
170 | } |
171 | |
172 | impl Distribution<()> for Standard { |
173 | #[allow (clippy::unused_unit)] |
174 | #[inline ] |
175 | fn sample<R: Rng + ?Sized>(&self, _: &mut R) -> () { |
176 | () |
177 | } |
178 | } |
179 | tuple_impl! {A} |
180 | tuple_impl! {A, B} |
181 | tuple_impl! {A, B, C} |
182 | tuple_impl! {A, B, C, D} |
183 | tuple_impl! {A, B, C, D, E} |
184 | tuple_impl! {A, B, C, D, E, F} |
185 | tuple_impl! {A, B, C, D, E, F, G} |
186 | tuple_impl! {A, B, C, D, E, F, G, H} |
187 | tuple_impl! {A, B, C, D, E, F, G, H, I} |
188 | tuple_impl! {A, B, C, D, E, F, G, H, I, J} |
189 | tuple_impl! {A, B, C, D, E, F, G, H, I, J, K} |
190 | tuple_impl! {A, B, C, D, E, F, G, H, I, J, K, L} |
191 | |
192 | #[cfg (feature = "min_const_gen" )] |
193 | #[cfg_attr (doc_cfg, doc(cfg(feature = "min_const_gen" )))] |
194 | impl<T, const N: usize> Distribution<[T; N]> for Standard |
195 | where Standard: Distribution<T> |
196 | { |
197 | #[inline ] |
198 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> [T; N] { |
199 | let mut buff: [MaybeUninit<T>; N] = unsafe { MaybeUninit::uninit().assume_init() }; |
200 | |
201 | for elem in &mut buff { |
202 | *elem = MaybeUninit::new(_rng.gen()); |
203 | } |
204 | |
205 | unsafe { mem::transmute_copy::<_, _>(&buff) } |
206 | } |
207 | } |
208 | |
209 | #[cfg (not(feature = "min_const_gen" ))] |
210 | macro_rules! array_impl { |
211 | // recursive, given at least one type parameter: |
212 | {$n:expr, $t:ident, $($ts:ident,)*} => { |
213 | array_impl!{($n - 1), $($ts,)*} |
214 | |
215 | impl<T> Distribution<[T; $n]> for Standard where Standard: Distribution<T> { |
216 | #[inline] |
217 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> [T; $n] { |
218 | [_rng.gen::<$t>(), $(_rng.gen::<$ts>()),*] |
219 | } |
220 | } |
221 | }; |
222 | // empty case: |
223 | {$n:expr,} => { |
224 | impl<T> Distribution<[T; $n]> for Standard { |
225 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> [T; $n] { [] } |
226 | } |
227 | }; |
228 | } |
229 | |
230 | #[cfg (not(feature = "min_const_gen" ))] |
231 | array_impl! {32, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,} |
232 | |
233 | impl<T> Distribution<Option<T>> for Standard |
234 | where Standard: Distribution<T> |
235 | { |
236 | #[inline ] |
237 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Option<T> { |
238 | // UFCS is needed here: https://github.com/rust-lang/rust/issues/24066 |
239 | if rng.gen::<bool>() { |
240 | Some(rng.gen()) |
241 | } else { |
242 | None |
243 | } |
244 | } |
245 | } |
246 | |
247 | impl<T> Distribution<Wrapping<T>> for Standard |
248 | where Standard: Distribution<T> |
249 | { |
250 | #[inline ] |
251 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Wrapping<T> { |
252 | Wrapping(rng.gen()) |
253 | } |
254 | } |
255 | |
256 | |
257 | #[cfg (test)] |
258 | mod tests { |
259 | use super::*; |
260 | use crate::RngCore; |
261 | #[cfg (feature = "alloc" )] use alloc::string::String; |
262 | |
263 | #[test] |
264 | fn test_misc() { |
265 | let rng: &mut dyn RngCore = &mut crate::test::rng(820); |
266 | |
267 | rng.sample::<char, _>(Standard); |
268 | rng.sample::<bool, _>(Standard); |
269 | } |
270 | |
271 | #[cfg (feature = "alloc" )] |
272 | #[test] |
273 | fn test_chars() { |
274 | use core::iter; |
275 | let mut rng = crate::test::rng(805); |
276 | |
277 | // Test by generating a relatively large number of chars, so we also |
278 | // take the rejection sampling path. |
279 | let word: String = iter::repeat(()) |
280 | .map(|()| rng.gen::<char>()) |
281 | .take(1000) |
282 | .collect(); |
283 | assert!(!word.is_empty()); |
284 | } |
285 | |
286 | #[test] |
287 | fn test_alphanumeric() { |
288 | let mut rng = crate::test::rng(806); |
289 | |
290 | // Test by generating a relatively large number of chars, so we also |
291 | // take the rejection sampling path. |
292 | let mut incorrect = false; |
293 | for _ in 0..100 { |
294 | let c: char = rng.sample(Alphanumeric).into(); |
295 | incorrect |= !(('0' ..='9' ).contains(&c) || |
296 | ('A' ..='Z' ).contains(&c) || |
297 | ('a' ..='z' ).contains(&c) ); |
298 | } |
299 | assert!(!incorrect); |
300 | } |
301 | |
302 | #[test] |
303 | fn value_stability() { |
304 | fn test_samples<T: Copy + core::fmt::Debug + PartialEq, D: Distribution<T>>( |
305 | distr: &D, zero: T, expected: &[T], |
306 | ) { |
307 | let mut rng = crate::test::rng(807); |
308 | let mut buf = [zero; 5]; |
309 | for x in &mut buf { |
310 | *x = rng.sample(&distr); |
311 | } |
312 | assert_eq!(&buf, expected); |
313 | } |
314 | |
315 | test_samples(&Standard, 'a' , &[ |
316 | ' \u{8cdac}' , |
317 | ' \u{a346a}' , |
318 | ' \u{80120}' , |
319 | ' \u{ed692}' , |
320 | ' \u{35888}' , |
321 | ]); |
322 | test_samples(&Alphanumeric, 0, &[104, 109, 101, 51, 77]); |
323 | test_samples(&Standard, false, &[true, true, false, true, false]); |
324 | test_samples(&Standard, None as Option<bool>, &[ |
325 | Some(true), |
326 | None, |
327 | Some(false), |
328 | None, |
329 | Some(false), |
330 | ]); |
331 | test_samples(&Standard, Wrapping(0i32), &[ |
332 | Wrapping(-2074640887), |
333 | Wrapping(-1719949321), |
334 | Wrapping(2018088303), |
335 | Wrapping(-547181756), |
336 | Wrapping(838957336), |
337 | ]); |
338 | |
339 | // We test only sub-sets of tuple and array impls |
340 | test_samples(&Standard, (), &[(), (), (), (), ()]); |
341 | test_samples(&Standard, (false,), &[ |
342 | (true,), |
343 | (true,), |
344 | (false,), |
345 | (true,), |
346 | (false,), |
347 | ]); |
348 | test_samples(&Standard, (false, false), &[ |
349 | (true, true), |
350 | (false, true), |
351 | (false, false), |
352 | (true, false), |
353 | (false, false), |
354 | ]); |
355 | |
356 | test_samples(&Standard, [0u8; 0], &[[], [], [], [], []]); |
357 | test_samples(&Standard, [0u8; 3], &[ |
358 | [9, 247, 111], |
359 | [68, 24, 13], |
360 | [174, 19, 194], |
361 | [172, 69, 213], |
362 | [149, 207, 29], |
363 | ]); |
364 | } |
365 | } |
366 | |