| 1 | // Copyright 2018 Developers of the Rand project. | 
| 2 | // | 
|---|
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or | 
|---|
| 4 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license | 
|---|
| 5 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your | 
|---|
| 6 | // option. This file may not be copied, modified, or distributed | 
|---|
| 7 | // except according to those terms. | 
|---|
| 8 |  | 
|---|
| 9 | //! The Bernoulli distribution. | 
|---|
| 10 |  | 
|---|
| 11 | use crate::distributions::Distribution; | 
|---|
| 12 | use crate::Rng; | 
|---|
| 13 | use core::{fmt, u64}; | 
|---|
| 14 |  | 
|---|
| 15 | #[ cfg(feature = "serde1")] | 
|---|
| 16 | use serde::{Serialize, Deserialize}; | 
|---|
| 17 | /// The Bernoulli distribution. | 
|---|
| 18 | /// | 
|---|
| 19 | /// This is a special case of the Binomial distribution where `n = 1`. | 
|---|
| 20 | /// | 
|---|
| 21 | /// # Example | 
|---|
| 22 | /// | 
|---|
| 23 | /// ```rust | 
|---|
| 24 | /// use rand::distributions::{Bernoulli, Distribution}; | 
|---|
| 25 | /// | 
|---|
| 26 | /// let d = Bernoulli::new(0.3).unwrap(); | 
|---|
| 27 | /// let v = d.sample(&mut rand::thread_rng()); | 
|---|
| 28 | /// println!( "{} is from a Bernoulli distribution", v); | 
|---|
| 29 | /// ``` | 
|---|
| 30 | /// | 
|---|
| 31 | /// # Precision | 
|---|
| 32 | /// | 
|---|
| 33 | /// This `Bernoulli` distribution uses 64 bits from the RNG (a `u64`), | 
|---|
| 34 | /// so only probabilities that are multiples of 2<sup>-64</sup> can be | 
|---|
| 35 | /// represented. | 
|---|
| 36 | #[ derive(Clone, Copy, Debug, PartialEq)] | 
|---|
| 37 | #[ cfg_attr(feature = "serde1", derive(Serialize, Deserialize))] | 
|---|
| 38 | pub struct Bernoulli { | 
|---|
| 39 | /// Probability of success, relative to the maximal integer. | 
|---|
| 40 | p_int: u64, | 
|---|
| 41 | } | 
|---|
| 42 |  | 
|---|
| 43 | // To sample from the Bernoulli distribution we use a method that compares a | 
|---|
| 44 | // random `u64` value `v < (p * 2^64)`. | 
|---|
| 45 | // | 
|---|
| 46 | // If `p == 1.0`, the integer `v` to compare against can not represented as a | 
|---|
| 47 | // `u64`. We manually set it to `u64::MAX` instead (2^64 - 1 instead of 2^64). | 
|---|
| 48 | // Note that  value of `p < 1.0` can never result in `u64::MAX`, because an | 
|---|
| 49 | // `f64` only has 53 bits of precision, and the next largest value of `p` will | 
|---|
| 50 | // result in `2^64 - 2048`. | 
|---|
| 51 | // | 
|---|
| 52 | // Also there is a 100% theoretical concern: if someone consistently wants to | 
|---|
| 53 | // generate `true` using the Bernoulli distribution (i.e. by using a probability | 
|---|
| 54 | // of `1.0`), just using `u64::MAX` is not enough. On average it would return | 
|---|
| 55 | // false once every 2^64 iterations. Some people apparently care about this | 
|---|
| 56 | // case. | 
|---|
| 57 | // | 
|---|
| 58 | // That is why we special-case `u64::MAX` to always return `true`, without using | 
|---|
| 59 | // the RNG, and pay the performance price for all uses that *are* reasonable. | 
|---|
| 60 | // Luckily, if `new()` and `sample` are close, the compiler can optimize out the | 
|---|
| 61 | // extra check. | 
|---|
| 62 | const ALWAYS_TRUE: u64 = u64::MAX; | 
|---|
| 63 |  | 
|---|
| 64 | // This is just `2.0.powi(64)`, but written this way because it is not available | 
|---|
| 65 | // in `no_std` mode. | 
|---|
| 66 | const SCALE: f64 = 2.0 * (1u64 << 63) as f64; | 
|---|
| 67 |  | 
|---|
| 68 | /// Error type returned from `Bernoulli::new`. | 
|---|
| 69 | #[ derive(Clone, Copy, Debug, PartialEq, Eq)] | 
|---|
| 70 | pub enum BernoulliError { | 
|---|
| 71 | /// `p < 0` or `p > 1`. | 
|---|
| 72 | InvalidProbability, | 
|---|
| 73 | } | 
|---|
| 74 |  | 
|---|
| 75 | impl fmt::Display for BernoulliError { | 
|---|
| 76 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { | 
|---|
| 77 | f.write_str(data:match self { | 
|---|
| 78 | BernoulliError::InvalidProbability => "p is outside [0, 1] in Bernoulli distribution", | 
|---|
| 79 | }) | 
|---|
| 80 | } | 
|---|
| 81 | } | 
|---|
| 82 |  | 
|---|
| 83 | #[ cfg(feature = "std")] | 
|---|
| 84 | impl ::std::error::Error for BernoulliError {} | 
|---|
| 85 |  | 
|---|
| 86 | impl Bernoulli { | 
|---|
| 87 | /// Construct a new `Bernoulli` with the given probability of success `p`. | 
|---|
| 88 | /// | 
|---|
| 89 | /// # Precision | 
|---|
| 90 | /// | 
|---|
| 91 | /// For `p = 1.0`, the resulting distribution will always generate true. | 
|---|
| 92 | /// For `p = 0.0`, the resulting distribution will always generate false. | 
|---|
| 93 | /// | 
|---|
| 94 | /// This method is accurate for any input `p` in the range `[0, 1]` which is | 
|---|
| 95 | /// a multiple of 2<sup>-64</sup>. (Note that not all multiples of | 
|---|
| 96 | /// 2<sup>-64</sup> in `[0, 1]` can be represented as a `f64`.) | 
|---|
| 97 | #[ inline] | 
|---|
| 98 | pub fn new(p: f64) -> Result<Bernoulli, BernoulliError> { | 
|---|
| 99 | if !(0.0..1.0).contains(&p) { | 
|---|
| 100 | if p == 1.0 { | 
|---|
| 101 | return Ok(Bernoulli { p_int: ALWAYS_TRUE }); | 
|---|
| 102 | } | 
|---|
| 103 | return Err(BernoulliError::InvalidProbability); | 
|---|
| 104 | } | 
|---|
| 105 | Ok(Bernoulli { | 
|---|
| 106 | p_int: (p * SCALE) as u64, | 
|---|
| 107 | }) | 
|---|
| 108 | } | 
|---|
| 109 |  | 
|---|
| 110 | /// Construct a new `Bernoulli` with the probability of success of | 
|---|
| 111 | /// `numerator`-in-`denominator`. I.e. `new_ratio(2, 3)` will return | 
|---|
| 112 | /// a `Bernoulli` with a 2-in-3 chance, or about 67%, of returning `true`. | 
|---|
| 113 | /// | 
|---|
| 114 | /// return `true`. If `numerator == 0` it will always return `false`. | 
|---|
| 115 | /// For `numerator > denominator` and `denominator == 0`, this returns an | 
|---|
| 116 | /// error. Otherwise, for `numerator == denominator`, samples are always | 
|---|
| 117 | /// true; for `numerator == 0` samples are always false. | 
|---|
| 118 | #[ inline] | 
|---|
| 119 | pub fn from_ratio(numerator: u32, denominator: u32) -> Result<Bernoulli, BernoulliError> { | 
|---|
| 120 | if numerator > denominator || denominator == 0 { | 
|---|
| 121 | return Err(BernoulliError::InvalidProbability); | 
|---|
| 122 | } | 
|---|
| 123 | if numerator == denominator { | 
|---|
| 124 | return Ok(Bernoulli { p_int: ALWAYS_TRUE }); | 
|---|
| 125 | } | 
|---|
| 126 | let p_int = ((f64::from(numerator) / f64::from(denominator)) * SCALE) as u64; | 
|---|
| 127 | Ok(Bernoulli { p_int }) | 
|---|
| 128 | } | 
|---|
| 129 | } | 
|---|
| 130 |  | 
|---|
| 131 | impl Distribution<bool> for Bernoulli { | 
|---|
| 132 | #[ inline] | 
|---|
| 133 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> bool { | 
|---|
| 134 | // Make sure to always return true for p = 1.0. | 
|---|
| 135 | if self.p_int == ALWAYS_TRUE { | 
|---|
| 136 | return true; | 
|---|
| 137 | } | 
|---|
| 138 | let v: u64 = rng.gen(); | 
|---|
| 139 | v < self.p_int | 
|---|
| 140 | } | 
|---|
| 141 | } | 
|---|
| 142 |  | 
|---|
| 143 | #[ cfg(test)] | 
|---|
| 144 | mod test { | 
|---|
| 145 | use super::Bernoulli; | 
|---|
| 146 | use crate::distributions::Distribution; | 
|---|
| 147 | use crate::Rng; | 
|---|
| 148 |  | 
|---|
| 149 | #[ test] | 
|---|
| 150 | #[ cfg(feature= "serde1")] | 
|---|
| 151 | fn test_serializing_deserializing_bernoulli() { | 
|---|
| 152 | let coin_flip = Bernoulli::new(0.5).unwrap(); | 
|---|
| 153 | let de_coin_flip : Bernoulli = bincode::deserialize(&bincode::serialize(&coin_flip).unwrap()).unwrap(); | 
|---|
| 154 |  | 
|---|
| 155 | assert_eq!(coin_flip.p_int, de_coin_flip.p_int); | 
|---|
| 156 | } | 
|---|
| 157 |  | 
|---|
| 158 | #[ test] | 
|---|
| 159 | fn test_trivial() { | 
|---|
| 160 | // We prefer to be explicit here. | 
|---|
| 161 | #![ allow(clippy::bool_assert_comparison)] | 
|---|
| 162 |  | 
|---|
| 163 | let mut r = crate::test::rng(1); | 
|---|
| 164 | let always_false = Bernoulli::new(0.0).unwrap(); | 
|---|
| 165 | let always_true = Bernoulli::new(1.0).unwrap(); | 
|---|
| 166 | for _ in 0..5 { | 
|---|
| 167 | assert_eq!(r.sample::<bool, _>(&always_false), false); | 
|---|
| 168 | assert_eq!(r.sample::<bool, _>(&always_true), true); | 
|---|
| 169 | assert_eq!(Distribution::<bool>::sample(&always_false, &mut r), false); | 
|---|
| 170 | assert_eq!(Distribution::<bool>::sample(&always_true, &mut r), true); | 
|---|
| 171 | } | 
|---|
| 172 | } | 
|---|
| 173 |  | 
|---|
| 174 | #[ test] | 
|---|
| 175 | #[ cfg_attr(miri, ignore)] // Miri is too slow | 
|---|
| 176 | fn test_average() { | 
|---|
| 177 | const P: f64 = 0.3; | 
|---|
| 178 | const NUM: u32 = 3; | 
|---|
| 179 | const DENOM: u32 = 10; | 
|---|
| 180 | let d1 = Bernoulli::new(P).unwrap(); | 
|---|
| 181 | let d2 = Bernoulli::from_ratio(NUM, DENOM).unwrap(); | 
|---|
| 182 | const N: u32 = 100_000; | 
|---|
| 183 |  | 
|---|
| 184 | let mut sum1: u32 = 0; | 
|---|
| 185 | let mut sum2: u32 = 0; | 
|---|
| 186 | let mut rng = crate::test::rng(2); | 
|---|
| 187 | for _ in 0..N { | 
|---|
| 188 | if d1.sample(&mut rng) { | 
|---|
| 189 | sum1 += 1; | 
|---|
| 190 | } | 
|---|
| 191 | if d2.sample(&mut rng) { | 
|---|
| 192 | sum2 += 1; | 
|---|
| 193 | } | 
|---|
| 194 | } | 
|---|
| 195 | let avg1 = (sum1 as f64) / (N as f64); | 
|---|
| 196 | assert!((avg1 - P).abs() < 5e-3); | 
|---|
| 197 |  | 
|---|
| 198 | let avg2 = (sum2 as f64) / (N as f64); | 
|---|
| 199 | assert!((avg2 - (NUM as f64) / (DENOM as f64)).abs() < 5e-3); | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | #[ test] | 
|---|
| 203 | fn value_stability() { | 
|---|
| 204 | let mut rng = crate::test::rng(3); | 
|---|
| 205 | let distr = Bernoulli::new(0.4532).unwrap(); | 
|---|
| 206 | let mut buf = [false; 10]; | 
|---|
| 207 | for x in &mut buf { | 
|---|
| 208 | *x = rng.sample(&distr); | 
|---|
| 209 | } | 
|---|
| 210 | assert_eq!(buf, [ | 
|---|
| 211 | true, false, false, true, false, false, true, true, true, true | 
|---|
| 212 | ]); | 
|---|
| 213 | } | 
|---|
| 214 |  | 
|---|
| 215 | #[ test] | 
|---|
| 216 | fn bernoulli_distributions_can_be_compared() { | 
|---|
| 217 | assert_eq!(Bernoulli::new(1.0), Bernoulli::new(1.0)); | 
|---|
| 218 | } | 
|---|
| 219 | } | 
|---|
| 220 |  | 
|---|