1 | //! Kernel density estimation |
---|---|

2 | |

3 | pub mod kernel; |

4 | |

5 | use self::kernel::Kernel; |

6 | use crate::stats::float::Float; |

7 | use crate::stats::univariate::Sample; |

8 | #[cfg(feature = "rayon")] |

9 | use rayon::prelude::*; |

10 | |

11 | /// Univariate kernel density estimator |

12 | pub struct Kde<'a, A, K> |

13 | where |

14 | A: Float, |

15 | K: Kernel<A>, |

16 | { |

17 | bandwidth: A, |

18 | kernel: K, |

19 | sample: &'a Sample<A>, |

20 | } |

21 | |

22 | impl<'a, A, K> Kde<'a, A, K> |

23 | where |

24 | A: 'a + Float, |

25 | K: Kernel<A>, |

26 | { |

27 | /// Creates a new kernel density estimator from the `sample`, using a kernel and estimating |

28 | /// the bandwidth using the method `bw` |

29 | pub fn new(sample: &'a Sample<A>, kernel: K, bw: Bandwidth) -> Kde<'a, A, K> { |

30 | Kde { |

31 | bandwidth: bw.estimate(sample), |

32 | kernel, |

33 | sample, |

34 | } |

35 | } |

36 | |

37 | /// Returns the bandwidth used by the estimator |

38 | pub fn bandwidth(&self) -> A { |

39 | self.bandwidth |

40 | } |

41 | |

42 | /// Maps the KDE over `xs` |

43 | /// |

44 | /// - Multihreaded |

45 | pub fn map(&self, xs: &[A]) -> Box<[A]> { |

46 | #[cfg(feature = "rayon")] |

47 | let iter = xs.par_iter(); |

48 | |

49 | #[cfg(not(feature = "rayon"))] |

50 | let iter = xs.iter(); |

51 | |

52 | iter.map(|&x| self.estimate(x)) |

53 | .collect::<Vec<_>>() |

54 | .into_boxed_slice() |

55 | } |

56 | |

57 | /// Estimates the probability density of `x` |

58 | pub fn estimate(&self, x: A) -> A { |

59 | let _0 = A::cast(0); |

60 | let slice = self.sample; |

61 | let h = self.bandwidth; |

62 | let n = A::cast(slice.len()); |

63 | |

64 | let sum = slice |

65 | .iter() |

66 | .fold(_0, |acc, &x_i| acc + self.kernel.evaluate((x - x_i) / h)); |

67 | |

68 | sum / (h * n) |

69 | } |

70 | } |

71 | |

72 | /// Method to estimate the bandwidth |

73 | pub enum Bandwidth { |

74 | /// Use Silverman's rule of thumb to estimate the bandwidth from the sample |

75 | Silverman, |

76 | } |

77 | |

78 | impl Bandwidth { |

79 | fn estimate<A: Float>(self, sample: &Sample<A>) -> A { |

80 | match self { |

81 | Bandwidth::Silverman => { |

82 | let factor = A::cast(4. / 3.); |

83 | let exponent = A::cast(1. / 5.); |

84 | let n = A::cast(sample.len()); |

85 | let sigma = sample.std_dev(None); |

86 | |

87 | sigma * (factor / n).powf(exponent) |

88 | } |

89 | } |

90 | } |

91 | } |

92 | |

93 | #[cfg(test)] |

94 | macro_rules! test { |

95 | ($ty:ident) => { |

96 | mod $ty { |

97 | use approx::relative_eq; |

98 | use quickcheck::quickcheck; |

99 | use quickcheck::TestResult; |

100 | |

101 | use crate::stats::univariate::kde::kernel::Gaussian; |

102 | use crate::stats::univariate::kde::{Bandwidth, Kde}; |

103 | use crate::stats::univariate::Sample; |

104 | |

105 | // The [-inf inf] integral of the estimated PDF should be one |

106 | quickcheck! { |

107 | fn integral(size: u8, start: u8) -> TestResult { |

108 | let size = size as usize; |

109 | let start = start as usize; |

110 | const DX: $ty = 1e-3; |

111 | |

112 | if let Some(v) = crate::stats::test::vec::<$ty>(size, start) { |

113 | let slice = &v[start..]; |

114 | let data = Sample::new(slice); |

115 | let kde = Kde::new(data, Gaussian, Bandwidth::Silverman); |

116 | let h = kde.bandwidth(); |

117 | // NB Obviously a [-inf inf] integral is not feasible, but this range works |

118 | // quite well |

119 | let (a, b) = (data.min() - 5. * h, data.max() + 5. * h); |

120 | |

121 | let mut acc = 0.; |

122 | let mut x = a; |

123 | let mut y = kde.estimate(a); |

124 | |

125 | while x < b { |

126 | acc += DX * y / 2.; |

127 | |

128 | x += DX; |

129 | y = kde.estimate(x); |

130 | |

131 | acc += DX * y / 2.; |

132 | } |

133 | |

134 | TestResult::from_bool(relative_eq!(acc, 1., epsilon = 2e-5)) |

135 | } else { |

136 | TestResult::discard() |

137 | } |

138 | } |

139 | } |

140 | } |

141 | }; |

142 | } |

143 | |

144 | #[cfg(test)] |

145 | mod test { |

146 | test!(f32); |

147 | test!(f64); |

148 | } |

149 |