| 1 | // Copyright 2018 Developers of the Rand project. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 4 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 5 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 6 | // option. This file may not be copied, modified, or distributed |
| 7 | // except according to those terms. |
| 8 | |
| 9 | //! The implementations of the `StandardUniform` distribution for other built-in types. |
| 10 | |
| 11 | #[cfg (feature = "alloc" )] |
| 12 | use alloc::string::String; |
| 13 | use core::char; |
| 14 | use core::num::Wrapping; |
| 15 | |
| 16 | #[cfg (feature = "alloc" )] |
| 17 | use crate::distr::SampleString; |
| 18 | use crate::distr::{Distribution, StandardUniform, Uniform}; |
| 19 | use crate::Rng; |
| 20 | |
| 21 | use core::mem::{self, MaybeUninit}; |
| 22 | #[cfg (feature = "simd_support" )] |
| 23 | use core::simd::prelude::*; |
| 24 | #[cfg (feature = "simd_support" )] |
| 25 | use core::simd::{LaneCount, MaskElement, SupportedLaneCount}; |
| 26 | #[cfg (feature = "serde" )] |
| 27 | use serde::{Deserialize, Serialize}; |
| 28 | |
| 29 | // ----- Sampling distributions ----- |
| 30 | |
| 31 | /// Sample a `u8`, uniformly distributed over ASCII letters and numbers: |
| 32 | /// a-z, A-Z and 0-9. |
| 33 | /// |
| 34 | /// # Example |
| 35 | /// |
| 36 | /// ``` |
| 37 | /// use rand::Rng; |
| 38 | /// use rand::distr::Alphanumeric; |
| 39 | /// |
| 40 | /// let mut rng = rand::rng(); |
| 41 | /// let chars: String = (0..7).map(|_| rng.sample(Alphanumeric) as char).collect(); |
| 42 | /// println!("Random chars: {}" , chars); |
| 43 | /// ``` |
| 44 | /// |
| 45 | /// The [`SampleString`] trait provides an easier method of generating |
| 46 | /// a random [`String`], and offers more efficient allocation: |
| 47 | /// ``` |
| 48 | /// use rand::distr::{Alphanumeric, SampleString}; |
| 49 | /// let string = Alphanumeric.sample_string(&mut rand::rng(), 16); |
| 50 | /// println!("Random string: {}" , string); |
| 51 | /// ``` |
| 52 | /// |
| 53 | /// # Passwords |
| 54 | /// |
| 55 | /// Users sometimes ask whether it is safe to use a string of random characters |
| 56 | /// as a password. In principle, all RNGs in Rand implementing `CryptoRng` are |
| 57 | /// suitable as a source of randomness for generating passwords (if they are |
| 58 | /// properly seeded), but it is more conservative to only use randomness |
| 59 | /// directly from the operating system via the `getrandom` crate, or the |
| 60 | /// corresponding bindings of a crypto library. |
| 61 | /// |
| 62 | /// When generating passwords or keys, it is important to consider the threat |
| 63 | /// model and in some cases the memorability of the password. This is out of |
| 64 | /// scope of the Rand project, and therefore we defer to the following |
| 65 | /// references: |
| 66 | /// |
| 67 | /// - [Wikipedia article on Password Strength](https://en.wikipedia.org/wiki/Password_strength) |
| 68 | /// - [Diceware for generating memorable passwords](https://en.wikipedia.org/wiki/Diceware) |
| 69 | #[derive (Debug, Clone, Copy, Default)] |
| 70 | #[cfg_attr (feature = "serde" , derive(Serialize, Deserialize))] |
| 71 | pub struct Alphanumeric; |
| 72 | |
| 73 | // ----- Implementations of distributions ----- |
| 74 | |
| 75 | impl Distribution<char> for StandardUniform { |
| 76 | #[inline ] |
| 77 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> char { |
| 78 | // A valid `char` is either in the interval `[0, 0xD800)` or |
| 79 | // `(0xDFFF, 0x11_0000)`. All `char`s must therefore be in |
| 80 | // `[0, 0x11_0000)` but not in the "gap" `[0xD800, 0xDFFF]` which is |
| 81 | // reserved for surrogates. This is the size of that gap. |
| 82 | const GAP_SIZE: u32 = 0xDFFF - 0xD800 + 1; |
| 83 | |
| 84 | // Uniform::new(0, 0x11_0000 - GAP_SIZE) can also be used, but it |
| 85 | // seemed slower. |
| 86 | let range: Uniform = Uniform::new(GAP_SIZE, high:0x11_0000).unwrap(); |
| 87 | |
| 88 | let mut n: u32 = range.sample(rng); |
| 89 | if n <= 0xDFFF { |
| 90 | n -= GAP_SIZE; |
| 91 | } |
| 92 | unsafe { char::from_u32_unchecked(n) } |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | #[cfg (feature = "alloc" )] |
| 97 | impl SampleString for StandardUniform { |
| 98 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, s: &mut String, len: usize) { |
| 99 | // A char is encoded with at most four bytes, thus this reservation is |
| 100 | // guaranteed to be sufficient. We do not shrink_to_fit afterwards so |
| 101 | // that repeated usage on the same `String` buffer does not reallocate. |
| 102 | s.reserve(additional:4 * len); |
| 103 | s.extend(iter:Distribution::<char>::sample_iter(self, rng).take(len)); |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | impl Distribution<u8> for Alphanumeric { |
| 108 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> u8 { |
| 109 | const RANGE: u32 = 26 + 26 + 10; |
| 110 | const GEN_ASCII_STR_CHARSET: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ |
| 111 | abcdefghijklmnopqrstuvwxyz\ |
| 112 | 0123456789" ; |
| 113 | // We can pick from 62 characters. This is so close to a power of 2, 64, |
| 114 | // that we can do better than `Uniform`. Use a simple bitshift and |
| 115 | // rejection sampling. We do not use a bitmask, because for small RNGs |
| 116 | // the most significant bits are usually of higher quality. |
| 117 | loop { |
| 118 | let var: u32 = rng.next_u32() >> (32 - 6); |
| 119 | if var < RANGE { |
| 120 | return GEN_ASCII_STR_CHARSET[var as usize]; |
| 121 | } |
| 122 | } |
| 123 | } |
| 124 | } |
| 125 | |
| 126 | #[cfg (feature = "alloc" )] |
| 127 | impl SampleString for Alphanumeric { |
| 128 | fn append_string<R: Rng + ?Sized>(&self, rng: &mut R, string: &mut String, len: usize) { |
| 129 | unsafe { |
| 130 | let v: &mut Vec = string.as_mut_vec(); |
| 131 | v.extend(self.sample_iter(rng).take(len)); |
| 132 | } |
| 133 | } |
| 134 | } |
| 135 | |
| 136 | impl Distribution<bool> for StandardUniform { |
| 137 | #[inline ] |
| 138 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> bool { |
| 139 | // We can compare against an arbitrary bit of an u32 to get a bool. |
| 140 | // Because the least significant bits of a lower quality RNG can have |
| 141 | // simple patterns, we compare against the most significant bit. This is |
| 142 | // easiest done using a sign test. |
| 143 | (rng.next_u32() as i32) < 0 |
| 144 | } |
| 145 | } |
| 146 | |
| 147 | /// Note that on some hardware like x86/64 mask operations like [`_mm_blendv_epi8`] |
| 148 | /// only care about a single bit. This means that you could use uniform random bits |
| 149 | /// directly: |
| 150 | /// |
| 151 | /// ```ignore |
| 152 | /// // this may be faster... |
| 153 | /// let x = unsafe { _mm_blendv_epi8(a.into(), b.into(), rng.random::<__m128i>()) }; |
| 154 | /// |
| 155 | /// // ...than this |
| 156 | /// let x = rng.random::<mask8x16>().select(b, a); |
| 157 | /// ``` |
| 158 | /// |
| 159 | /// Since most bits are unused you could also generate only as many bits as you need, i.e.: |
| 160 | /// ``` |
| 161 | /// #![feature(portable_simd)] |
| 162 | /// use std::simd::prelude::*; |
| 163 | /// use rand::prelude::*; |
| 164 | /// let mut rng = rand::rng(); |
| 165 | /// |
| 166 | /// let x = u16x8::splat(rng.random::<u8>() as u16); |
| 167 | /// let mask = u16x8::splat(1) << u16x8::from([0, 1, 2, 3, 4, 5, 6, 7]); |
| 168 | /// let rand_mask = (x & mask).simd_eq(mask); |
| 169 | /// ``` |
| 170 | /// |
| 171 | /// [`_mm_blendv_epi8`]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_blendv_epi8&ig_expand=514/ |
| 172 | /// [`simd_support`]: https://github.com/rust-random/rand#crate-features |
| 173 | #[cfg (feature = "simd_support" )] |
| 174 | impl<T, const LANES: usize> Distribution<Mask<T, LANES>> for StandardUniform |
| 175 | where |
| 176 | T: MaskElement + Default, |
| 177 | LaneCount<LANES>: SupportedLaneCount, |
| 178 | StandardUniform: Distribution<Simd<T, LANES>>, |
| 179 | Simd<T, LANES>: SimdPartialOrd<Mask = Mask<T, LANES>>, |
| 180 | { |
| 181 | #[inline ] |
| 182 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Mask<T, LANES> { |
| 183 | // `MaskElement` must be a signed integer, so this is equivalent |
| 184 | // to the scalar `i32 < 0` method |
| 185 | let var = rng.random::<Simd<T, LANES>>(); |
| 186 | var.simd_lt(Simd::default()) |
| 187 | } |
| 188 | } |
| 189 | |
| 190 | /// Implement `Distribution<(A, B, C, ...)> for StandardUniform`, using the list of |
| 191 | /// identifiers |
| 192 | macro_rules! tuple_impl { |
| 193 | ($($tyvar:ident)*) => { |
| 194 | impl< $($tyvar,)* > Distribution<($($tyvar,)*)> for StandardUniform |
| 195 | where $( |
| 196 | StandardUniform: Distribution< $tyvar >, |
| 197 | )* |
| 198 | { |
| 199 | #[inline] |
| 200 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> ( $($tyvar,)* ) { |
| 201 | let out = ($( |
| 202 | // use the $tyvar's to get the appropriate number of |
| 203 | // repeats (they're not actually needed) |
| 204 | rng.random::<$tyvar>() |
| 205 | ,)*); |
| 206 | |
| 207 | // Suppress the unused variable warning for empty tuple |
| 208 | let _rng = rng; |
| 209 | |
| 210 | out |
| 211 | } |
| 212 | } |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | /// Looping wrapper for `tuple_impl`. Given (A, B, C), it also generates |
| 217 | /// implementations for (A, B) and (A,) |
| 218 | macro_rules! tuple_impls { |
| 219 | ($($tyvar:ident)*) => {tuple_impls!{[] $($tyvar)*}}; |
| 220 | |
| 221 | ([$($prefix:ident)*] $head:ident $($tail:ident)*) => { |
| 222 | tuple_impl!{$($prefix)*} |
| 223 | tuple_impls!{[$($prefix)* $head] $($tail)*} |
| 224 | }; |
| 225 | |
| 226 | |
| 227 | ([$($prefix:ident)*]) => { |
| 228 | tuple_impl!{$($prefix)*} |
| 229 | }; |
| 230 | |
| 231 | } |
| 232 | |
| 233 | tuple_impls! {A B C D E F G H I J K L} |
| 234 | |
| 235 | impl<T, const N: usize> Distribution<[T; N]> for StandardUniform |
| 236 | where |
| 237 | StandardUniform: Distribution<T>, |
| 238 | { |
| 239 | #[inline ] |
| 240 | fn sample<R: Rng + ?Sized>(&self, _rng: &mut R) -> [T; N] { |
| 241 | let mut buff: [MaybeUninit<T>; N] = unsafe { MaybeUninit::uninit().assume_init() }; |
| 242 | |
| 243 | for elem: &mut MaybeUninit in &mut buff { |
| 244 | *elem = MaybeUninit::new(val:_rng.random()); |
| 245 | } |
| 246 | |
| 247 | unsafe { mem::transmute_copy::<_, _>(&buff) } |
| 248 | } |
| 249 | } |
| 250 | |
| 251 | impl<T> Distribution<Wrapping<T>> for StandardUniform |
| 252 | where |
| 253 | StandardUniform: Distribution<T>, |
| 254 | { |
| 255 | #[inline ] |
| 256 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> Wrapping<T> { |
| 257 | Wrapping(rng.random()) |
| 258 | } |
| 259 | } |
| 260 | |
| 261 | #[cfg (test)] |
| 262 | mod tests { |
| 263 | use super::*; |
| 264 | use crate::RngCore; |
| 265 | |
| 266 | #[test ] |
| 267 | fn test_misc() { |
| 268 | let rng: &mut dyn RngCore = &mut crate::test::rng(820); |
| 269 | |
| 270 | rng.sample::<char, _>(StandardUniform); |
| 271 | rng.sample::<bool, _>(StandardUniform); |
| 272 | } |
| 273 | |
| 274 | #[cfg (feature = "alloc" )] |
| 275 | #[test ] |
| 276 | fn test_chars() { |
| 277 | use core::iter; |
| 278 | let mut rng = crate::test::rng(805); |
| 279 | |
| 280 | // Test by generating a relatively large number of chars, so we also |
| 281 | // take the rejection sampling path. |
| 282 | let word: String = iter::repeat(()) |
| 283 | .map(|()| rng.random::<char>()) |
| 284 | .take(1000) |
| 285 | .collect(); |
| 286 | assert!(!word.is_empty()); |
| 287 | } |
| 288 | |
| 289 | #[test ] |
| 290 | fn test_alphanumeric() { |
| 291 | let mut rng = crate::test::rng(806); |
| 292 | |
| 293 | // Test by generating a relatively large number of chars, so we also |
| 294 | // take the rejection sampling path. |
| 295 | let mut incorrect = false; |
| 296 | for _ in 0..100 { |
| 297 | let c: char = rng.sample(Alphanumeric).into(); |
| 298 | incorrect |= !c.is_ascii_alphanumeric(); |
| 299 | } |
| 300 | assert!(!incorrect); |
| 301 | } |
| 302 | |
| 303 | #[test ] |
| 304 | fn value_stability() { |
| 305 | fn test_samples<T: Copy + core::fmt::Debug + PartialEq, D: Distribution<T>>( |
| 306 | distr: &D, |
| 307 | zero: T, |
| 308 | expected: &[T], |
| 309 | ) { |
| 310 | let mut rng = crate::test::rng(807); |
| 311 | let mut buf = [zero; 5]; |
| 312 | for x in &mut buf { |
| 313 | *x = rng.sample(distr); |
| 314 | } |
| 315 | assert_eq!(&buf, expected); |
| 316 | } |
| 317 | |
| 318 | test_samples( |
| 319 | &StandardUniform, |
| 320 | 'a' , |
| 321 | &[ |
| 322 | ' \u{8cdac}' , |
| 323 | ' \u{a346a}' , |
| 324 | ' \u{80120}' , |
| 325 | ' \u{ed692}' , |
| 326 | ' \u{35888}' , |
| 327 | ], |
| 328 | ); |
| 329 | test_samples(&Alphanumeric, 0, &[104, 109, 101, 51, 77]); |
| 330 | test_samples(&StandardUniform, false, &[true, true, false, true, false]); |
| 331 | test_samples( |
| 332 | &StandardUniform, |
| 333 | Wrapping(0i32), |
| 334 | &[ |
| 335 | Wrapping(-2074640887), |
| 336 | Wrapping(-1719949321), |
| 337 | Wrapping(2018088303), |
| 338 | Wrapping(-547181756), |
| 339 | Wrapping(838957336), |
| 340 | ], |
| 341 | ); |
| 342 | |
| 343 | // We test only sub-sets of tuple and array impls |
| 344 | test_samples(&StandardUniform, (), &[(), (), (), (), ()]); |
| 345 | test_samples( |
| 346 | &StandardUniform, |
| 347 | (false,), |
| 348 | &[(true,), (true,), (false,), (true,), (false,)], |
| 349 | ); |
| 350 | test_samples( |
| 351 | &StandardUniform, |
| 352 | (false, false), |
| 353 | &[ |
| 354 | (true, true), |
| 355 | (false, true), |
| 356 | (false, false), |
| 357 | (true, false), |
| 358 | (false, false), |
| 359 | ], |
| 360 | ); |
| 361 | |
| 362 | test_samples(&StandardUniform, [0u8; 0], &[[], [], [], [], []]); |
| 363 | test_samples( |
| 364 | &StandardUniform, |
| 365 | [0u8; 3], |
| 366 | &[ |
| 367 | [9, 247, 111], |
| 368 | [68, 24, 13], |
| 369 | [174, 19, 194], |
| 370 | [172, 69, 213], |
| 371 | [149, 207, 29], |
| 372 | ], |
| 373 | ); |
| 374 | } |
| 375 | } |
| 376 | |