| 1 | use std::mem; |
| 2 | |
| 3 | use crate::stats::float::Float; |
| 4 | use crate::stats::rand_util::{new_rng, Rng}; |
| 5 | use crate::stats::univariate::Sample; |
| 6 | |
| 7 | pub struct Resamples<'a, A> |
| 8 | where |
| 9 | A: Float, |
| 10 | { |
| 11 | rng: Rng, |
| 12 | sample: &'a [A], |
| 13 | stage: Option<Vec<A>>, |
| 14 | } |
| 15 | |
| 16 | #[cfg_attr (feature = "cargo-clippy" , allow(clippy::should_implement_trait))] |
| 17 | impl<'a, A> Resamples<'a, A> |
| 18 | where |
| 19 | A: 'a + Float, |
| 20 | { |
| 21 | pub fn new(sample: &'a Sample<A>) -> Resamples<'a, A> { |
| 22 | let slice = sample; |
| 23 | |
| 24 | Resamples { |
| 25 | rng: new_rng(), |
| 26 | sample: slice, |
| 27 | stage: None, |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | pub fn next(&mut self) -> &Sample<A> { |
| 32 | let n = self.sample.len(); |
| 33 | let rng = &mut self.rng; |
| 34 | |
| 35 | match self.stage { |
| 36 | None => { |
| 37 | let mut stage = Vec::with_capacity(n); |
| 38 | |
| 39 | for _ in 0..n { |
| 40 | let idx = rng.rand_range(0u64..(self.sample.len() as u64)); |
| 41 | stage.push(self.sample[idx as usize]) |
| 42 | } |
| 43 | |
| 44 | self.stage = Some(stage); |
| 45 | } |
| 46 | Some(ref mut stage) => { |
| 47 | for elem in stage.iter_mut() { |
| 48 | let idx = rng.rand_range(0u64..(self.sample.len() as u64)); |
| 49 | *elem = self.sample[idx as usize] |
| 50 | } |
| 51 | } |
| 52 | } |
| 53 | |
| 54 | if let Some(ref v) = self.stage { |
| 55 | unsafe { mem::transmute::<&[_], _>(v) } |
| 56 | } else { |
| 57 | unreachable!(); |
| 58 | } |
| 59 | } |
| 60 | } |
| 61 | |
| 62 | #[cfg (test)] |
| 63 | mod test { |
| 64 | use quickcheck::quickcheck; |
| 65 | use quickcheck::TestResult; |
| 66 | use std::collections::HashSet; |
| 67 | |
| 68 | use crate::stats::univariate::resamples::Resamples; |
| 69 | use crate::stats::univariate::Sample; |
| 70 | |
| 71 | // Check that the resample is a subset of the sample |
| 72 | quickcheck! { |
| 73 | fn subset(size: u8, nresamples: u8) -> TestResult { |
| 74 | let size = size as usize; |
| 75 | let nresamples = nresamples as usize; |
| 76 | if size > 1 { |
| 77 | let v: Vec<_> = (0..size).map(|i| i as f32).collect(); |
| 78 | let sample = Sample::new(&v); |
| 79 | let mut resamples = Resamples::new(sample); |
| 80 | let sample = v.iter().map(|&x| x as i64).collect::<HashSet<_>>(); |
| 81 | |
| 82 | TestResult::from_bool((0..nresamples).all(|_| { |
| 83 | let resample = resamples.next() |
| 84 | |
| 85 | .iter() |
| 86 | .map(|&x| x as i64) |
| 87 | .collect::<HashSet<_>>(); |
| 88 | |
| 89 | resample.is_subset(&sample) |
| 90 | })) |
| 91 | } else { |
| 92 | TestResult::discard() |
| 93 | } |
| 94 | } |
| 95 | } |
| 96 | |
| 97 | #[test] |
| 98 | fn different_subsets() { |
| 99 | let size = 1000; |
| 100 | let v: Vec<_> = (0..size).map(|i| i as f32).collect(); |
| 101 | let sample = Sample::new(&v); |
| 102 | let mut resamples = Resamples::new(sample); |
| 103 | |
| 104 | // Hypothetically, we might see one duplicate, but more than one is likely to be a bug. |
| 105 | let mut num_duplicated = 0; |
| 106 | for _ in 0..1000 { |
| 107 | let sample_1 = resamples.next().iter().cloned().collect::<Vec<_>>(); |
| 108 | let sample_2 = resamples.next().iter().cloned().collect::<Vec<_>>(); |
| 109 | |
| 110 | if sample_1 == sample_2 { |
| 111 | num_duplicated += 1; |
| 112 | } |
| 113 | } |
| 114 | |
| 115 | if num_duplicated > 1 { |
| 116 | panic!("Found {} duplicate samples" , num_duplicated); |
| 117 | } |
| 118 | } |
| 119 | } |
| 120 | |