1 | use std::mem; |
---|---|

2 | |

3 | use crate::stats::float::Float; |

4 | use crate::stats::rand_util::{new_rng, Rng}; |

5 | use crate::stats::univariate::Sample; |

6 | |

7 | pub struct Resamples<'a, A> |

8 | where |

9 | A: Float, |

10 | { |

11 | rng: Rng, |

12 | sample: &'a [A], |

13 | stage: Option<Vec<A>>, |

14 | } |

15 | |

16 | #[cfg_attr(feature = "cargo-clippy", allow(clippy::should_implement_trait))] |

17 | impl<'a, A> Resamples<'a, A> |

18 | where |

19 | A: 'a + Float, |

20 | { |

21 | pub fn new(sample: &'a Sample<A>) -> Resamples<'a, A> { |

22 | let slice = sample; |

23 | |

24 | Resamples { |

25 | rng: new_rng(), |

26 | sample: slice, |

27 | stage: None, |

28 | } |

29 | } |

30 | |

31 | pub fn next(&mut self) -> &Sample<A> { |

32 | let n = self.sample.len(); |

33 | let rng = &mut self.rng; |

34 | |

35 | match self.stage { |

36 | None => { |

37 | let mut stage = Vec::with_capacity(n); |

38 | |

39 | for _ in 0..n { |

40 | let idx = rng.rand_range(0u64..(self.sample.len() as u64)); |

41 | stage.push(self.sample[idx as usize]) |

42 | } |

43 | |

44 | self.stage = Some(stage); |

45 | } |

46 | Some(ref mut stage) => { |

47 | for elem in stage.iter_mut() { |

48 | let idx = rng.rand_range(0u64..(self.sample.len() as u64)); |

49 | *elem = self.sample[idx as usize] |

50 | } |

51 | } |

52 | } |

53 | |

54 | if let Some(ref v) = self.stage { |

55 | unsafe { mem::transmute::<&[_], _>(v) } |

56 | } else { |

57 | unreachable!(); |

58 | } |

59 | } |

60 | } |

61 | |

62 | #[cfg(test)] |

63 | mod test { |

64 | use quickcheck::quickcheck; |

65 | use quickcheck::TestResult; |

66 | use std::collections::HashSet; |

67 | |

68 | use crate::stats::univariate::resamples::Resamples; |

69 | use crate::stats::univariate::Sample; |

70 | |

71 | // Check that the resample is a subset of the sample |

72 | quickcheck! { |

73 | fn subset(size: u8, nresamples: u8) -> TestResult { |

74 | let size = size as usize; |

75 | let nresamples = nresamples as usize; |

76 | if size > 1 { |

77 | let v: Vec<_> = (0..size).map(|i| i as f32).collect(); |

78 | let sample = Sample::new(&v); |

79 | let mut resamples = Resamples::new(sample); |

80 | let sample = v.iter().map(|&x| x as i64).collect::<HashSet<_>>(); |

81 | |

82 | TestResult::from_bool((0..nresamples).all(|_| { |

83 | let resample = resamples.next() |

84 | |

85 | .iter() |

86 | .map(|&x| x as i64) |

87 | .collect::<HashSet<_>>(); |

88 | |

89 | resample.is_subset(&sample) |

90 | })) |

91 | } else { |

92 | TestResult::discard() |

93 | } |

94 | } |

95 | } |

96 | |

97 | #[test] |

98 | fn different_subsets() { |

99 | let size = 1000; |

100 | let v: Vec<_> = (0..size).map(|i| i as f32).collect(); |

101 | let sample = Sample::new(&v); |

102 | let mut resamples = Resamples::new(sample); |

103 | |

104 | // Hypothetically, we might see one duplicate, but more than one is likely to be a bug. |

105 | let mut num_duplicated = 0; |

106 | for _ in 0..1000 { |

107 | let sample_1 = resamples.next().iter().cloned().collect::<Vec<_>>(); |

108 | let sample_2 = resamples.next().iter().cloned().collect::<Vec<_>>(); |

109 | |

110 | if sample_1 == sample_2 { |

111 | num_duplicated += 1; |

112 | } |

113 | } |

114 | |

115 | if num_duplicated > 1 { |

116 | panic!("Found {} duplicate samples", num_duplicated); |

117 | } |

118 | } |

119 | } |

120 |