1 | // Copyright 2018 Developers of the Rand project. |
2 | // |
3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
4 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
5 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
6 | // option. This file may not be copied, modified, or distributed |
7 | // except according to those terms. |
8 | |
9 | //! The implementations of the `StandardUniform` distribution for other built-in types. |
10 | |
11 | #[cfg (feature = "alloc" )] |
12 | use alloc::string::String; |
13 | use core::array; |
14 | use core::char; |
15 | use core::num::Wrapping; |
16 | |
17 | #[cfg (feature = "alloc" )] |
18 | use crate::distr::SampleString; |
19 | use crate::distr::{Distribution, StandardUniform, Uniform}; |
20 | use crate::Rng; |
21 | |
22 | #[cfg (feature = "simd_support" )] |
23 | use core::simd::prelude::*; |
24 | #[cfg (feature = "simd_support" )] |
25 | use core::simd::{LaneCount, MaskElement, SupportedLaneCount}; |
26 | #[cfg (feature = "serde" )] |
27 | use serde::{Deserialize, Serialize}; |
28 | |
29 | // ----- Sampling distributions ----- |
30 | |
31 | /// Sample a `u8`, uniformly distributed over ASCII letters and numbers: |
32 | /// a-z, A-Z and 0-9. |
33 | /// |
34 | /// # Example |
35 | /// |
36 | /// ``` |
37 | /// use rand::Rng; |
38 | /// use rand::distr::Alphanumeric; |
39 | /// |
40 | /// let mut rng = rand::rng(); |
41 | /// let chars: String = (0..7).map(|_| rng.sample(Alphanumeric) as char).collect(); |
42 | /// println!("Random chars: {}" , chars); |
43 | /// ``` |
44 | /// |
45 | /// The [`SampleString`] trait provides an easier method of generating |
46 | /// a random [`String`], and offers more efficient allocation: |
47 | /// ``` |
48 | /// use rand::distr::{Alphanumeric, SampleString}; |
49 | /// let string = Alphanumeric.sample_string(&mut rand::rng(), 16); |
50 | /// println!("Random string: {}" , string); |
51 | /// ``` |
52 | /// |
53 | /// # Passwords |
54 | /// |
55 | /// Users sometimes ask whether it is safe to use a string of random characters |
56 | /// as a password. In principle, all RNGs in Rand implementing `CryptoRng` are |
57 | /// suitable as a source of randomness for generating passwords (if they are |
58 | /// properly seeded), but it is more conservative to only use randomness |
59 | /// directly from the operating system via the `getrandom` crate, or the |
60 | /// corresponding bindings of a crypto library. |
61 | /// |
62 | /// When generating passwords or keys, it is important to consider the threat |
63 | /// model and in some cases the memorability of the password. This is out of |
64 | /// scope of the Rand project, and therefore we defer to the following |
65 | /// references: |
66 | /// |
67 | /// - [Wikipedia article on Password Strength](https://en.wikipedia.org/wiki/Password_strength) |
68 | /// - [Diceware for generating memorable passwords](https://en.wikipedia.org/wiki/Diceware) |
69 | #[derive(Debug, Clone, Copy, Default)] |
70 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
71 | pub struct Alphanumeric; |
72 | |
73 | /// Sample a [`u8`], uniformly distributed over letters: |
74 | /// a-z and A-Z. |
75 | /// |
76 | /// # Example |
77 | /// |
78 | /// You're able to generate random Alphabetic characters via mapping or via the |
79 | /// [`SampleString::sample_string`] method like so: |
80 | /// |
81 | /// ``` |
82 | /// use rand::Rng; |
83 | /// use rand::distr::{Alphabetic, SampleString}; |
84 | /// |
85 | /// // Manual mapping |
86 | /// let mut rng = rand::rng(); |
87 | /// let chars: String = (0..7).map(|_| rng.sample(Alphabetic) as char).collect(); |
88 | /// println!("Random chars: {}" , chars); |
89 | /// |
90 | /// // Using [`SampleString::sample_string`] |
91 | /// let string = Alphabetic.sample_string(&mut rand::rng(), 16); |
92 | /// println!("Random string: {}" , string); |
93 | /// ``` |
94 | /// |
95 | /// # Passwords |
96 | /// |
97 | /// Refer to [`Alphanumeric#Passwords`]. |
98 | #[derive(Debug, Clone, Copy, Default)] |
99 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
100 | pub struct Alphabetic; |
101 | |
102 | // ----- Implementations of distributions ----- |
103 | |
104 | impl Distribution<char> for StandardUniform { |
105 | #[inline ] |
106 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char { |
107 | // A valid `char` is either in the interval `[0, 0xD800)` or |
108 | // `(0xDFFF, 0x11_0000)`. All `char`s must therefore be in |
109 | // `[0, 0x11_0000)` but not in the "gap" `[0xD800, 0xDFFF]` which is |
110 | // reserved for surrogates. This is the size of that gap. |
111 | const GAP_SIZE: u32 = 0xDFFF - 0xD800 + 1; |
112 | |
113 | // Uniform::new(0, 0x11_0000 - GAP_SIZE) can also be used, but it |
114 | // seemed slower. |
115 | let range = Uniform::new(GAP_SIZE, high:0x11_0000).unwrap(); |
116 | |
117 | let mut n = range.sample(rng); |
118 | if n <= 0xDFFF { |
119 | n -= GAP_SIZE; |
120 | } |
121 | // SAFETY: We ensure above that `n` represents a `char`. |
122 | unsafe { char::from_u32_unchecked(n) } |
123 | } |
124 | } |
125 | |
126 | #[cfg (feature = "alloc" )] |
127 | impl SampleString for StandardUniform { |
128 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, s: &mut String, len: usize) { |
129 | // A char is encoded with at most four bytes, thus this reservation is |
130 | // guaranteed to be sufficient. We do not shrink_to_fit afterwards so |
131 | // that repeated usage on the same `String` buffer does not reallocate. |
132 | s.reserve(4 * len); |
133 | s.extend(Distribution::<char>::sample_iter(self, rng).take(len)); |
134 | } |
135 | } |
136 | |
137 | impl Distribution<u8> for Alphanumeric { |
138 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 { |
139 | const RANGE: u32 = 26 + 26 + 10; |
140 | const GEN_ASCII_STR_CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ |
141 | abcdefghijklmnopqrstuvwxyz\ |
142 | 0123456789" ; |
143 | // We can pick from 62 characters. This is so close to a power of 2, 64, |
144 | // that we can do better than `Uniform`. Use a simple bitshift and |
145 | // rejection sampling. We do not use a bitmask, because for small RNGs |
146 | // the most significant bits are usually of higher quality. |
147 | loop { |
148 | let var: u32 = rng.next_u32() >> (32 - 6); |
149 | if var < RANGE { |
150 | return GEN_ASCII_STR_CHARSET[var as usize]; |
151 | } |
152 | } |
153 | } |
154 | } |
155 | |
156 | impl Distribution<u8> for Alphabetic { |
157 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 { |
158 | const RANGE: u8 = 26 + 26; |
159 | |
160 | let offset = rng.random_range(0..RANGE) + b'A' ; |
161 | |
162 | // Account for upper-cases |
163 | offset + (offset > b'Z' ) as u8 * (b'a' - b'Z' - 1) |
164 | } |
165 | } |
166 | |
167 | #[cfg (feature = "alloc" )] |
168 | impl SampleString for Alphanumeric { |
169 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) { |
170 | // SAFETY: `self` only samples alphanumeric characters, which are valid UTF-8. |
171 | unsafe { |
172 | let v = string.as_mut_vec(); |
173 | v.extend( |
174 | self.sample_iter(rng) |
175 | .take(len) |
176 | .inspect(|b| debug_assert!(b.is_ascii_alphanumeric())), |
177 | ); |
178 | } |
179 | } |
180 | } |
181 | |
182 | #[cfg (feature = "alloc" )] |
183 | impl SampleString for Alphabetic { |
184 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) { |
185 | // SAFETY: With this distribution we guarantee that we're working with valid ASCII |
186 | // characters. |
187 | // See [#1590](https://github.com/rust-random/rand/issues/1590). |
188 | unsafe { |
189 | let v = string.as_mut_vec(); |
190 | v.reserve_exact(len); |
191 | v.extend(self.sample_iter(rng).take(len)); |
192 | } |
193 | } |
194 | } |
195 | |
196 | impl Distribution<bool> for StandardUniform { |
197 | #[inline ] |
198 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> bool { |
199 | // We can compare against an arbitrary bit of an u32 to get a bool. |
200 | // Because the least significant bits of a lower quality RNG can have |
201 | // simple patterns, we compare against the most significant bit. This is |
202 | // easiest done using a sign test. |
203 | (rng.next_u32() as i32) < 0 |
204 | } |
205 | } |
206 | |
207 | /// Note that on some hardware like x86/64 mask operations like [`_mm_blendv_epi8`] |
208 | /// only care about a single bit. This means that you could use uniform random bits |
209 | /// directly: |
210 | /// |
211 | /// ```ignore |
212 | /// // this may be faster... |
213 | /// let x = unsafe { _mm_blendv_epi8(a.into(), b.into(), rng.random::<__m128i>()) }; |
214 | /// |
215 | /// // ...than this |
216 | /// let x = rng.random::<mask8x16>().select(b, a); |
217 | /// ``` |
218 | /// |
219 | /// Since most bits are unused you could also generate only as many bits as you need, i.e.: |
220 | /// ``` |
221 | /// #![feature(portable_simd)] |
222 | /// use std::simd::prelude::*; |
223 | /// use rand::prelude::*; |
224 | /// let mut rng = rand::rng(); |
225 | /// |
226 | /// let x = u16x8::splat(rng.random::<u8>() as u16); |
227 | /// let mask = u16x8::splat(1) << u16x8::from([0, 1, 2, 3, 4, 5, 6, 7]); |
228 | /// let rand_mask = (x & mask).simd_eq(mask); |
229 | /// ``` |
230 | /// |
231 | /// [`_mm_blendv_epi8`]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8&ig_expand=514/ |
232 | /// [`simd_support`]: https://github.com/rust-random/rand#crate-features |
233 | #[cfg (feature = "simd_support" )] |
234 | impl<T, const LANES: usize> Distribution<Mask<T, LANES>> for StandardUniform |
235 | where |
236 | T: MaskElement + Default, |
237 | LaneCount<LANES>: SupportedLaneCount, |
238 | StandardUniform: Distribution<Simd<T, LANES>>, |
239 | Simd<T, LANES>: SimdPartialOrd<Mask = Mask<T, LANES>>, |
240 | { |
241 | #[inline ] |
242 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Mask<T, LANES> { |
243 | // `MaskElement` must be a signed integer, so this is equivalent |
244 | // to the scalar `i32 < 0` method |
245 | let var = rng.random::<Simd<T, LANES>>(); |
246 | var.simd_lt(Simd::default()) |
247 | } |
248 | } |
249 | |
250 | /// Implement `Distribution<(A, B, C, ...)> for StandardUniform`, using the list of |
251 | /// identifiers |
252 | macro_rules! tuple_impl { |
253 | ($($tyvar:ident)*) => { |
254 | impl< $($tyvar,)* > Distribution<($($tyvar,)*)> for StandardUniform |
255 | where $( |
256 | StandardUniform: Distribution< $tyvar >, |
257 | )* |
258 | { |
259 | #[inline] |
260 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ( $($tyvar,)* ) { |
261 | let out = ($( |
262 | // use the $tyvar's to get the appropriate number of |
263 | // repeats (they're not actually needed) |
264 | rng.random::<$tyvar>() |
265 | ,)*); |
266 | |
267 | // Suppress the unused variable warning for empty tuple |
268 | let _rng = rng; |
269 | |
270 | out |
271 | } |
272 | } |
273 | } |
274 | } |
275 | |
276 | /// Looping wrapper for `tuple_impl`. Given (A, B, C), it also generates |
277 | /// implementations for (A, B) and (A,) |
278 | macro_rules! tuple_impls { |
279 | ($($tyvar:ident)*) => {tuple_impls!{[] $($tyvar)*}}; |
280 | |
281 | ([$($prefix:ident)*] $head:ident $($tail:ident)*) => { |
282 | tuple_impl!{$($prefix)*} |
283 | tuple_impls!{[$($prefix)* $head] $($tail)*} |
284 | }; |
285 | |
286 | |
287 | ([$($prefix:ident)*]) => { |
288 | tuple_impl!{$($prefix)*} |
289 | }; |
290 | |
291 | } |
292 | |
293 | tuple_impls! {A B C D E F G H I J K L} |
294 | |
295 | impl<T, const N: usize> Distribution<[T; N]> for StandardUniform |
296 | where |
297 | StandardUniform: Distribution<T>, |
298 | { |
299 | #[inline ] |
300 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> [T; N] { |
301 | array::from_fn(|_| rng.random()) |
302 | } |
303 | } |
304 | |
305 | impl<T> Distribution<Wrapping<T>> for StandardUniform |
306 | where |
307 | StandardUniform: Distribution<T>, |
308 | { |
309 | #[inline ] |
310 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Wrapping<T> { |
311 | Wrapping(rng.random()) |
312 | } |
313 | } |
314 | |
315 | #[cfg (test)] |
316 | mod tests { |
317 | use super::*; |
318 | use crate::RngCore; |
319 | |
320 | #[test] |
321 | fn test_misc() { |
322 | let rng: &mut dyn RngCore = &mut crate::test::rng(820); |
323 | |
324 | rng.sample::<char, _>(StandardUniform); |
325 | rng.sample::<bool, _>(StandardUniform); |
326 | } |
327 | |
328 | #[cfg (feature = "alloc" )] |
329 | #[test] |
330 | fn test_chars() { |
331 | use core::iter; |
332 | let mut rng = crate::test::rng(805); |
333 | |
334 | // Test by generating a relatively large number of chars, so we also |
335 | // take the rejection sampling path. |
336 | let word: String = iter::repeat(()) |
337 | .map(|()| rng.random::<char>()) |
338 | .take(1000) |
339 | .collect(); |
340 | assert!(!word.is_empty()); |
341 | } |
342 | |
343 | #[test] |
344 | fn test_alphanumeric() { |
345 | let mut rng = crate::test::rng(806); |
346 | |
347 | // Test by generating a relatively large number of chars, so we also |
348 | // take the rejection sampling path. |
349 | let mut incorrect = false; |
350 | for _ in 0..100 { |
351 | let c: char = rng.sample(Alphanumeric).into(); |
352 | incorrect |= !c.is_ascii_alphanumeric(); |
353 | } |
354 | assert!(!incorrect); |
355 | } |
356 | |
357 | #[test] |
358 | fn test_alphabetic() { |
359 | let mut rng = crate::test::rng(806); |
360 | |
361 | // Test by generating a relatively large number of chars, so we also |
362 | // take the rejection sampling path. |
363 | let mut incorrect = false; |
364 | for _ in 0..100 { |
365 | let c: char = rng.sample(Alphabetic).into(); |
366 | incorrect |= !c.is_ascii_alphabetic(); |
367 | } |
368 | assert!(!incorrect); |
369 | } |
370 | |
371 | #[test] |
372 | fn value_stability() { |
373 | fn test_samples<T: Copy + core::fmt::Debug + PartialEq, D: Distribution<T>>( |
374 | distr: &D, |
375 | zero: T, |
376 | expected: &[T], |
377 | ) { |
378 | let mut rng = crate::test::rng(807); |
379 | let mut buf = [zero; 5]; |
380 | for x in &mut buf { |
381 | *x = rng.sample(distr); |
382 | } |
383 | assert_eq!(&buf, expected); |
384 | } |
385 | |
386 | test_samples( |
387 | &StandardUniform, |
388 | 'a' , |
389 | &[ |
390 | ' \u{8cdac}' , |
391 | ' \u{a346a}' , |
392 | ' \u{80120}' , |
393 | ' \u{ed692}' , |
394 | ' \u{35888}' , |
395 | ], |
396 | ); |
397 | test_samples(&Alphanumeric, 0, &[104, 109, 101, 51, 77]); |
398 | test_samples(&Alphabetic, 0, &[97, 102, 89, 116, 75]); |
399 | test_samples(&StandardUniform, false, &[true, true, false, true, false]); |
400 | test_samples( |
401 | &StandardUniform, |
402 | Wrapping(0i32), |
403 | &[ |
404 | Wrapping(-2074640887), |
405 | Wrapping(-1719949321), |
406 | Wrapping(2018088303), |
407 | Wrapping(-547181756), |
408 | Wrapping(838957336), |
409 | ], |
410 | ); |
411 | |
412 | // We test only sub-sets of tuple and array impls |
413 | test_samples(&StandardUniform, (), &[(), (), (), (), ()]); |
414 | test_samples( |
415 | &StandardUniform, |
416 | (false,), |
417 | &[(true,), (true,), (false,), (true,), (false,)], |
418 | ); |
419 | test_samples( |
420 | &StandardUniform, |
421 | (false, false), |
422 | &[ |
423 | (true, true), |
424 | (false, true), |
425 | (false, false), |
426 | (true, false), |
427 | (false, false), |
428 | ], |
429 | ); |
430 | |
431 | test_samples(&StandardUniform, [0u8; 0], &[[], [], [], [], []]); |
432 | test_samples( |
433 | &StandardUniform, |
434 | [0u8; 3], |
435 | &[ |
436 | [9, 247, 111], |
437 | [68, 24, 13], |
438 | [174, 19, 194], |
439 | [172, 69, 213], |
440 | [149, 207, 29], |
441 | ], |
442 | ); |
443 | } |
444 | } |
445 | |