| 1 | // Copyright 2018 Developers of the Rand project. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
| 4 | // https://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
| 5 | // <LICENSE-MIT or https://opensource.org/licenses/MIT>, at your |
| 6 | // option. This file may not be copied, modified, or distributed |
| 7 | // except according to those terms. |
| 8 | |
| 9 | //! Weighted index sampling |
| 10 | |
| 11 | use crate::distributions::uniform::{SampleBorrow, SampleUniform, UniformSampler}; |
| 12 | use crate::distributions::Distribution; |
| 13 | use crate::Rng; |
| 14 | use core::cmp::PartialOrd; |
| 15 | use core::fmt; |
| 16 | |
| 17 | // Note that this whole module is only imported if feature="alloc" is enabled. |
| 18 | use alloc::vec::Vec; |
| 19 | |
| 20 | #[cfg (feature = "serde1" )] |
| 21 | use serde::{Serialize, Deserialize}; |
| 22 | |
| 23 | /// A distribution using weighted sampling of discrete items |
| 24 | /// |
| 25 | /// Sampling a `WeightedIndex` distribution returns the index of a randomly |
| 26 | /// selected element from the iterator used when the `WeightedIndex` was |
| 27 | /// created. The chance of a given element being picked is proportional to the |
| 28 | /// value of the element. The weights can use any type `X` for which an |
| 29 | /// implementation of [`Uniform<X>`] exists. |
| 30 | /// |
| 31 | /// # Performance |
| 32 | /// |
| 33 | /// Time complexity of sampling from `WeightedIndex` is `O(log N)` where |
| 34 | /// `N` is the number of weights. As an alternative, |
| 35 | /// [`rand_distr::weighted_alias`](https://docs.rs/rand_distr/*/rand_distr/weighted_alias/index.html) |
| 36 | /// supports `O(1)` sampling, but with much higher initialisation cost. |
| 37 | /// |
| 38 | /// A `WeightedIndex<X>` contains a `Vec<X>` and a [`Uniform<X>`] and so its |
| 39 | /// size is the sum of the size of those objects, possibly plus some alignment. |
| 40 | /// |
| 41 | /// Creating a `WeightedIndex<X>` will allocate enough space to hold `N - 1` |
| 42 | /// weights of type `X`, where `N` is the number of weights. However, since |
| 43 | /// `Vec` doesn't guarantee a particular growth strategy, additional memory |
| 44 | /// might be allocated but not used. Since the `WeightedIndex` object also |
| 45 | /// contains, this might cause additional allocations, though for primitive |
| 46 | /// types, [`Uniform<X>`] doesn't allocate any memory. |
| 47 | /// |
| 48 | /// Sampling from `WeightedIndex` will result in a single call to |
| 49 | /// `Uniform<X>::sample` (method of the [`Distribution`] trait), which typically |
| 50 | /// will request a single value from the underlying [`RngCore`], though the |
| 51 | /// exact number depends on the implementation of `Uniform<X>::sample`. |
| 52 | /// |
| 53 | /// # Example |
| 54 | /// |
| 55 | /// ``` |
| 56 | /// use rand::prelude::*; |
| 57 | /// use rand::distributions::WeightedIndex; |
| 58 | /// |
| 59 | /// let choices = ['a' , 'b' , 'c' ]; |
| 60 | /// let weights = [2, 1, 1]; |
| 61 | /// let dist = WeightedIndex::new(&weights).unwrap(); |
| 62 | /// let mut rng = thread_rng(); |
| 63 | /// for _ in 0..100 { |
| 64 | /// // 50% chance to print 'a', 25% chance to print 'b', 25% chance to print 'c' |
| 65 | /// println!("{}" , choices[dist.sample(&mut rng)]); |
| 66 | /// } |
| 67 | /// |
| 68 | /// let items = [('a' , 0), ('b' , 3), ('c' , 7)]; |
| 69 | /// let dist2 = WeightedIndex::new(items.iter().map(|item| item.1)).unwrap(); |
| 70 | /// for _ in 0..100 { |
| 71 | /// // 0% chance to print 'a', 30% chance to print 'b', 70% chance to print 'c' |
| 72 | /// println!("{}" , items[dist2.sample(&mut rng)].0); |
| 73 | /// } |
| 74 | /// ``` |
| 75 | /// |
| 76 | /// [`Uniform<X>`]: crate::distributions::Uniform |
| 77 | /// [`RngCore`]: crate::RngCore |
| 78 | #[derive (Debug, Clone, PartialEq)] |
| 79 | #[cfg_attr (feature = "serde1" , derive(Serialize, Deserialize))] |
| 80 | #[cfg_attr (doc_cfg, doc(cfg(feature = "alloc" )))] |
| 81 | pub struct WeightedIndex<X: SampleUniform + PartialOrd> { |
| 82 | cumulative_weights: Vec<X>, |
| 83 | total_weight: X, |
| 84 | weight_distribution: X::Sampler, |
| 85 | } |
| 86 | |
| 87 | impl<X: SampleUniform + PartialOrd> WeightedIndex<X> { |
| 88 | /// Creates a new a `WeightedIndex` [`Distribution`] using the values |
| 89 | /// in `weights`. The weights can use any type `X` for which an |
| 90 | /// implementation of [`Uniform<X>`] exists. |
| 91 | /// |
| 92 | /// Returns an error if the iterator is empty, if any weight is `< 0`, or |
| 93 | /// if its total value is 0. |
| 94 | /// |
| 95 | /// [`Uniform<X>`]: crate::distributions::uniform::Uniform |
| 96 | pub fn new<I>(weights: I) -> Result<WeightedIndex<X>, WeightedError> |
| 97 | where |
| 98 | I: IntoIterator, |
| 99 | I::Item: SampleBorrow<X>, |
| 100 | X: for<'a> ::core::ops::AddAssign<&'a X> + Clone + Default, |
| 101 | { |
| 102 | let mut iter = weights.into_iter(); |
| 103 | let mut total_weight: X = iter.next().ok_or(WeightedError::NoItem)?.borrow().clone(); |
| 104 | |
| 105 | let zero = <X as Default>::default(); |
| 106 | if !(total_weight >= zero) { |
| 107 | return Err(WeightedError::InvalidWeight); |
| 108 | } |
| 109 | |
| 110 | let mut weights = Vec::<X>::with_capacity(iter.size_hint().0); |
| 111 | for w in iter { |
| 112 | // Note that `!(w >= x)` is not equivalent to `w < x` for partially |
| 113 | // ordered types due to NaNs which are equal to nothing. |
| 114 | if !(w.borrow() >= &zero) { |
| 115 | return Err(WeightedError::InvalidWeight); |
| 116 | } |
| 117 | weights.push(total_weight.clone()); |
| 118 | total_weight += w.borrow(); |
| 119 | } |
| 120 | |
| 121 | if total_weight == zero { |
| 122 | return Err(WeightedError::AllWeightsZero); |
| 123 | } |
| 124 | let distr = X::Sampler::new(zero, total_weight.clone()); |
| 125 | |
| 126 | Ok(WeightedIndex { |
| 127 | cumulative_weights: weights, |
| 128 | total_weight, |
| 129 | weight_distribution: distr, |
| 130 | }) |
| 131 | } |
| 132 | |
| 133 | /// Update a subset of weights, without changing the number of weights. |
| 134 | /// |
| 135 | /// `new_weights` must be sorted by the index. |
| 136 | /// |
| 137 | /// Using this method instead of `new` might be more efficient if only a small number of |
| 138 | /// weights is modified. No allocations are performed, unless the weight type `X` uses |
| 139 | /// allocation internally. |
| 140 | /// |
| 141 | /// In case of error, `self` is not modified. |
| 142 | pub fn update_weights(&mut self, new_weights: &[(usize, &X)]) -> Result<(), WeightedError> |
| 143 | where X: for<'a> ::core::ops::AddAssign<&'a X> |
| 144 | + for<'a> ::core::ops::SubAssign<&'a X> |
| 145 | + Clone |
| 146 | + Default { |
| 147 | if new_weights.is_empty() { |
| 148 | return Ok(()); |
| 149 | } |
| 150 | |
| 151 | let zero = <X as Default>::default(); |
| 152 | |
| 153 | let mut total_weight = self.total_weight.clone(); |
| 154 | |
| 155 | // Check for errors first, so we don't modify `self` in case something |
| 156 | // goes wrong. |
| 157 | let mut prev_i = None; |
| 158 | for &(i, w) in new_weights { |
| 159 | if let Some(old_i) = prev_i { |
| 160 | if old_i >= i { |
| 161 | return Err(WeightedError::InvalidWeight); |
| 162 | } |
| 163 | } |
| 164 | if !(*w >= zero) { |
| 165 | return Err(WeightedError::InvalidWeight); |
| 166 | } |
| 167 | if i > self.cumulative_weights.len() { |
| 168 | return Err(WeightedError::TooMany); |
| 169 | } |
| 170 | |
| 171 | let mut old_w = if i < self.cumulative_weights.len() { |
| 172 | self.cumulative_weights[i].clone() |
| 173 | } else { |
| 174 | self.total_weight.clone() |
| 175 | }; |
| 176 | if i > 0 { |
| 177 | old_w -= &self.cumulative_weights[i - 1]; |
| 178 | } |
| 179 | |
| 180 | total_weight -= &old_w; |
| 181 | total_weight += w; |
| 182 | prev_i = Some(i); |
| 183 | } |
| 184 | if total_weight <= zero { |
| 185 | return Err(WeightedError::AllWeightsZero); |
| 186 | } |
| 187 | |
| 188 | // Update the weights. Because we checked all the preconditions in the |
| 189 | // previous loop, this should never panic. |
| 190 | let mut iter = new_weights.iter(); |
| 191 | |
| 192 | let mut prev_weight = zero.clone(); |
| 193 | let mut next_new_weight = iter.next(); |
| 194 | let &(first_new_index, _) = next_new_weight.unwrap(); |
| 195 | let mut cumulative_weight = if first_new_index > 0 { |
| 196 | self.cumulative_weights[first_new_index - 1].clone() |
| 197 | } else { |
| 198 | zero.clone() |
| 199 | }; |
| 200 | for i in first_new_index..self.cumulative_weights.len() { |
| 201 | match next_new_weight { |
| 202 | Some(&(j, w)) if i == j => { |
| 203 | cumulative_weight += w; |
| 204 | next_new_weight = iter.next(); |
| 205 | } |
| 206 | _ => { |
| 207 | let mut tmp = self.cumulative_weights[i].clone(); |
| 208 | tmp -= &prev_weight; // We know this is positive. |
| 209 | cumulative_weight += &tmp; |
| 210 | } |
| 211 | } |
| 212 | prev_weight = cumulative_weight.clone(); |
| 213 | core::mem::swap(&mut prev_weight, &mut self.cumulative_weights[i]); |
| 214 | } |
| 215 | |
| 216 | self.total_weight = total_weight; |
| 217 | self.weight_distribution = X::Sampler::new(zero, self.total_weight.clone()); |
| 218 | |
| 219 | Ok(()) |
| 220 | } |
| 221 | } |
| 222 | |
| 223 | impl<X> Distribution<usize> for WeightedIndex<X> |
| 224 | where X: SampleUniform + PartialOrd |
| 225 | { |
| 226 | fn sample<R: Rng + ?Sized>(&self, rng: &mut R) -> usize { |
| 227 | use ::core::cmp::Ordering; |
| 228 | let chosen_weight: X = self.weight_distribution.sample(rng); |
| 229 | // Find the first item which has a weight *higher* than the chosen weight. |
| 230 | self.cumulative_weights |
| 231 | .binary_search_by(|w: &X| { |
| 232 | if *w <= chosen_weight { |
| 233 | Ordering::Less |
| 234 | } else { |
| 235 | Ordering::Greater |
| 236 | } |
| 237 | }) |
| 238 | .unwrap_err() |
| 239 | } |
| 240 | } |
| 241 | |
| 242 | #[cfg (test)] |
| 243 | mod test { |
| 244 | use super::*; |
| 245 | |
| 246 | #[cfg (feature = "serde1" )] |
| 247 | #[test ] |
| 248 | fn test_weightedindex_serde1() { |
| 249 | let weighted_index = WeightedIndex::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]).unwrap(); |
| 250 | |
| 251 | let ser_weighted_index = bincode::serialize(&weighted_index).unwrap(); |
| 252 | let de_weighted_index: WeightedIndex<i32> = |
| 253 | bincode::deserialize(&ser_weighted_index).unwrap(); |
| 254 | |
| 255 | assert_eq!( |
| 256 | de_weighted_index.cumulative_weights, |
| 257 | weighted_index.cumulative_weights |
| 258 | ); |
| 259 | assert_eq!(de_weighted_index.total_weight, weighted_index.total_weight); |
| 260 | } |
| 261 | |
| 262 | #[test ] |
| 263 | fn test_accepting_nan(){ |
| 264 | assert_eq!( |
| 265 | WeightedIndex::new(&[core::f32::NAN, 0.5]).unwrap_err(), |
| 266 | WeightedError::InvalidWeight, |
| 267 | ); |
| 268 | assert_eq!( |
| 269 | WeightedIndex::new(&[core::f32::NAN]).unwrap_err(), |
| 270 | WeightedError::InvalidWeight, |
| 271 | ); |
| 272 | assert_eq!( |
| 273 | WeightedIndex::new(&[0.5, core::f32::NAN]).unwrap_err(), |
| 274 | WeightedError::InvalidWeight, |
| 275 | ); |
| 276 | |
| 277 | assert_eq!( |
| 278 | WeightedIndex::new(&[0.5, 7.0]) |
| 279 | .unwrap() |
| 280 | .update_weights(&[(0, &core::f32::NAN)]) |
| 281 | .unwrap_err(), |
| 282 | WeightedError::InvalidWeight, |
| 283 | ) |
| 284 | } |
| 285 | |
| 286 | |
| 287 | #[test ] |
| 288 | #[cfg_attr (miri, ignore)] // Miri is too slow |
| 289 | fn test_weightedindex() { |
| 290 | let mut r = crate::test::rng(700); |
| 291 | const N_REPS: u32 = 5000; |
| 292 | let weights = [1u32, 2, 3, 0, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7]; |
| 293 | let total_weight = weights.iter().sum::<u32>() as f32; |
| 294 | |
| 295 | let verify = |result: [i32; 14]| { |
| 296 | for (i, count) in result.iter().enumerate() { |
| 297 | let exp = (weights[i] * N_REPS) as f32 / total_weight; |
| 298 | let mut err = (*count as f32 - exp).abs(); |
| 299 | if err != 0.0 { |
| 300 | err /= exp; |
| 301 | } |
| 302 | assert!(err <= 0.25); |
| 303 | } |
| 304 | }; |
| 305 | |
| 306 | // WeightedIndex from vec |
| 307 | let mut chosen = [0i32; 14]; |
| 308 | let distr = WeightedIndex::new(weights.to_vec()).unwrap(); |
| 309 | for _ in 0..N_REPS { |
| 310 | chosen[distr.sample(&mut r)] += 1; |
| 311 | } |
| 312 | verify(chosen); |
| 313 | |
| 314 | // WeightedIndex from slice |
| 315 | chosen = [0i32; 14]; |
| 316 | let distr = WeightedIndex::new(&weights[..]).unwrap(); |
| 317 | for _ in 0..N_REPS { |
| 318 | chosen[distr.sample(&mut r)] += 1; |
| 319 | } |
| 320 | verify(chosen); |
| 321 | |
| 322 | // WeightedIndex from iterator |
| 323 | chosen = [0i32; 14]; |
| 324 | let distr = WeightedIndex::new(weights.iter()).unwrap(); |
| 325 | for _ in 0..N_REPS { |
| 326 | chosen[distr.sample(&mut r)] += 1; |
| 327 | } |
| 328 | verify(chosen); |
| 329 | |
| 330 | for _ in 0..5 { |
| 331 | assert_eq!(WeightedIndex::new(&[0, 1]).unwrap().sample(&mut r), 1); |
| 332 | assert_eq!(WeightedIndex::new(&[1, 0]).unwrap().sample(&mut r), 0); |
| 333 | assert_eq!( |
| 334 | WeightedIndex::new(&[0, 0, 0, 0, 10, 0]) |
| 335 | .unwrap() |
| 336 | .sample(&mut r), |
| 337 | 4 |
| 338 | ); |
| 339 | } |
| 340 | |
| 341 | assert_eq!( |
| 342 | WeightedIndex::new(&[10][0..0]).unwrap_err(), |
| 343 | WeightedError::NoItem |
| 344 | ); |
| 345 | assert_eq!( |
| 346 | WeightedIndex::new(&[0]).unwrap_err(), |
| 347 | WeightedError::AllWeightsZero |
| 348 | ); |
| 349 | assert_eq!( |
| 350 | WeightedIndex::new(&[10, 20, -1, 30]).unwrap_err(), |
| 351 | WeightedError::InvalidWeight |
| 352 | ); |
| 353 | assert_eq!( |
| 354 | WeightedIndex::new(&[-10, 20, 1, 30]).unwrap_err(), |
| 355 | WeightedError::InvalidWeight |
| 356 | ); |
| 357 | assert_eq!( |
| 358 | WeightedIndex::new(&[-10]).unwrap_err(), |
| 359 | WeightedError::InvalidWeight |
| 360 | ); |
| 361 | } |
| 362 | |
| 363 | #[test ] |
| 364 | fn test_update_weights() { |
| 365 | let data = [ |
| 366 | ( |
| 367 | &[10u32, 2, 3, 4][..], |
| 368 | &[(1, &100), (2, &4)][..], // positive change |
| 369 | &[10, 100, 4, 4][..], |
| 370 | ), |
| 371 | ( |
| 372 | &[1u32, 2, 3, 0, 5, 6, 7, 1, 2, 3, 4, 5, 6, 7][..], |
| 373 | &[(2, &1), (5, &1), (13, &100)][..], // negative change and last element |
| 374 | &[1u32, 2, 1, 0, 5, 1, 7, 1, 2, 3, 4, 5, 6, 100][..], |
| 375 | ), |
| 376 | ]; |
| 377 | |
| 378 | for (weights, update, expected_weights) in data.iter() { |
| 379 | let total_weight = weights.iter().sum::<u32>(); |
| 380 | let mut distr = WeightedIndex::new(weights.to_vec()).unwrap(); |
| 381 | assert_eq!(distr.total_weight, total_weight); |
| 382 | |
| 383 | distr.update_weights(update).unwrap(); |
| 384 | let expected_total_weight = expected_weights.iter().sum::<u32>(); |
| 385 | let expected_distr = WeightedIndex::new(expected_weights.to_vec()).unwrap(); |
| 386 | assert_eq!(distr.total_weight, expected_total_weight); |
| 387 | assert_eq!(distr.total_weight, expected_distr.total_weight); |
| 388 | assert_eq!(distr.cumulative_weights, expected_distr.cumulative_weights); |
| 389 | } |
| 390 | } |
| 391 | |
| 392 | #[test ] |
| 393 | fn value_stability() { |
| 394 | fn test_samples<X: SampleUniform + PartialOrd, I>( |
| 395 | weights: I, buf: &mut [usize], expected: &[usize], |
| 396 | ) where |
| 397 | I: IntoIterator, |
| 398 | I::Item: SampleBorrow<X>, |
| 399 | X: for<'a> ::core::ops::AddAssign<&'a X> + Clone + Default, |
| 400 | { |
| 401 | assert_eq!(buf.len(), expected.len()); |
| 402 | let distr = WeightedIndex::new(weights).unwrap(); |
| 403 | let mut rng = crate::test::rng(701); |
| 404 | for r in buf.iter_mut() { |
| 405 | *r = rng.sample(&distr); |
| 406 | } |
| 407 | assert_eq!(buf, expected); |
| 408 | } |
| 409 | |
| 410 | let mut buf = [0; 10]; |
| 411 | test_samples(&[1i32, 1, 1, 1, 1, 1, 1, 1, 1], &mut buf, &[ |
| 412 | 0, 6, 2, 6, 3, 4, 7, 8, 2, 5, |
| 413 | ]); |
| 414 | test_samples(&[0.7f32, 0.1, 0.1, 0.1], &mut buf, &[ |
| 415 | 0, 0, 0, 1, 0, 0, 2, 3, 0, 0, |
| 416 | ]); |
| 417 | test_samples(&[1.0f64, 0.999, 0.998, 0.997], &mut buf, &[ |
| 418 | 2, 2, 1, 3, 2, 1, 3, 3, 2, 1, |
| 419 | ]); |
| 420 | } |
| 421 | |
| 422 | #[test ] |
| 423 | fn weighted_index_distributions_can_be_compared() { |
| 424 | assert_eq!(WeightedIndex::new(&[1, 2]), WeightedIndex::new(&[1, 2])); |
| 425 | } |
| 426 | } |
| 427 | |
| 428 | /// Error type returned from `WeightedIndex::new`. |
| 429 | #[cfg_attr (doc_cfg, doc(cfg(feature = "alloc" )))] |
| 430 | #[derive (Debug, Clone, Copy, PartialEq, Eq)] |
| 431 | pub enum WeightedError { |
| 432 | /// The provided weight collection contains no items. |
| 433 | NoItem, |
| 434 | |
| 435 | /// A weight is either less than zero, greater than the supported maximum, |
| 436 | /// NaN, or otherwise invalid. |
| 437 | InvalidWeight, |
| 438 | |
| 439 | /// All items in the provided weight collection are zero. |
| 440 | AllWeightsZero, |
| 441 | |
| 442 | /// Too many weights are provided (length greater than `u32::MAX`) |
| 443 | TooMany, |
| 444 | } |
| 445 | |
| 446 | #[cfg (feature = "std" )] |
| 447 | impl std::error::Error for WeightedError {} |
| 448 | |
| 449 | impl fmt::Display for WeightedError { |
| 450 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
| 451 | f.write_str(data:match *self { |
| 452 | WeightedError::NoItem => "No weights provided in distribution" , |
| 453 | WeightedError::InvalidWeight => "A weight is invalid in distribution" , |
| 454 | WeightedError::AllWeightsZero => "All weights are zero in distribution" , |
| 455 | WeightedError::TooMany => "Too many weights (hit u32::MAX) in distribution" , |
| 456 | }) |
| 457 | } |
| 458 | } |
| 459 | |