1 | use std::mem; |
2 | |
3 | use crate::stats::float::Float; |
4 | use crate::stats::rand_util::{new_rng, Rng}; |
5 | use crate::stats::univariate::Sample; |
6 | |
7 | pub struct Resamples<'a, A> |
8 | where |
9 | A: Float, |
10 | { |
11 | rng: Rng, |
12 | sample: &'a [A], |
13 | stage: Option<Vec<A>>, |
14 | } |
15 | |
16 | #[cfg_attr (feature = "cargo-clippy" , allow(clippy::should_implement_trait))] |
17 | impl<'a, A> Resamples<'a, A> |
18 | where |
19 | A: 'a + Float, |
20 | { |
21 | pub fn new(sample: &'a Sample<A>) -> Resamples<'a, A> { |
22 | let slice = sample; |
23 | |
24 | Resamples { |
25 | rng: new_rng(), |
26 | sample: slice, |
27 | stage: None, |
28 | } |
29 | } |
30 | |
31 | pub fn next(&mut self) -> &Sample<A> { |
32 | let n = self.sample.len(); |
33 | let rng = &mut self.rng; |
34 | |
35 | match self.stage { |
36 | None => { |
37 | let mut stage = Vec::with_capacity(n); |
38 | |
39 | for _ in 0..n { |
40 | let idx = rng.rand_range(0u64..(self.sample.len() as u64)); |
41 | stage.push(self.sample[idx as usize]) |
42 | } |
43 | |
44 | self.stage = Some(stage); |
45 | } |
46 | Some(ref mut stage) => { |
47 | for elem in stage.iter_mut() { |
48 | let idx = rng.rand_range(0u64..(self.sample.len() as u64)); |
49 | *elem = self.sample[idx as usize] |
50 | } |
51 | } |
52 | } |
53 | |
54 | if let Some(ref v) = self.stage { |
55 | unsafe { mem::transmute::<&[_], _>(v) } |
56 | } else { |
57 | unreachable!(); |
58 | } |
59 | } |
60 | } |
61 | |
62 | #[cfg (test)] |
63 | mod test { |
64 | use quickcheck::quickcheck; |
65 | use quickcheck::TestResult; |
66 | use std::collections::HashSet; |
67 | |
68 | use crate::stats::univariate::resamples::Resamples; |
69 | use crate::stats::univariate::Sample; |
70 | |
71 | // Check that the resample is a subset of the sample |
72 | quickcheck! { |
73 | fn subset(size: u8, nresamples: u8) -> TestResult { |
74 | let size = size as usize; |
75 | let nresamples = nresamples as usize; |
76 | if size > 1 { |
77 | let v: Vec<_> = (0..size).map(|i| i as f32).collect(); |
78 | let sample = Sample::new(&v); |
79 | let mut resamples = Resamples::new(sample); |
80 | let sample = v.iter().map(|&x| x as i64).collect::<HashSet<_>>(); |
81 | |
82 | TestResult::from_bool((0..nresamples).all(|_| { |
83 | let resample = resamples.next() |
84 | |
85 | .iter() |
86 | .map(|&x| x as i64) |
87 | .collect::<HashSet<_>>(); |
88 | |
89 | resample.is_subset(&sample) |
90 | })) |
91 | } else { |
92 | TestResult::discard() |
93 | } |
94 | } |
95 | } |
96 | |
97 | #[test] |
98 | fn different_subsets() { |
99 | let size = 1000; |
100 | let v: Vec<_> = (0..size).map(|i| i as f32).collect(); |
101 | let sample = Sample::new(&v); |
102 | let mut resamples = Resamples::new(sample); |
103 | |
104 | // Hypothetically, we might see one duplicate, but more than one is likely to be a bug. |
105 | let mut num_duplicated = 0; |
106 | for _ in 0..1000 { |
107 | let sample_1 = resamples.next().iter().cloned().collect::<Vec<_>>(); |
108 | let sample_2 = resamples.next().iter().cloned().collect::<Vec<_>>(); |
109 | |
110 | if sample_1 == sample_2 { |
111 | num_duplicated += 1; |
112 | } |
113 | } |
114 | |
115 | if num_duplicated > 1 { |
116 | panic!("Found {} duplicate samples" , num_duplicated); |
117 | } |
118 | } |
119 | } |
120 | |