| 1 | //! Bivariate analysis |
| 2 | |
| 3 | mod bootstrap; |
| 4 | pub mod regression; |
| 5 | mod resamples; |
| 6 | |
| 7 | use crate::stats::bivariate::resamples::Resamples; |
| 8 | use crate::stats::float::Float; |
| 9 | use crate::stats::tuple::{Tuple, TupledDistributionsBuilder}; |
| 10 | use crate::stats::univariate::Sample; |
| 11 | #[cfg (feature = "rayon" )] |
| 12 | use rayon::iter::{IntoParallelIterator, ParallelIterator}; |
| 13 | |
| 14 | /// Bivariate `(X, Y)` data |
| 15 | /// |
| 16 | /// Invariants: |
| 17 | /// |
| 18 | /// - No `NaN`s in the data |
| 19 | /// - At least two data points in the set |
| 20 | pub struct Data<'a, X, Y>(&'a [X], &'a [Y]); |
| 21 | |
| 22 | impl<'a, X, Y> Copy for Data<'a, X, Y> {} |
| 23 | |
| 24 | #[cfg_attr (feature = "cargo-clippy" , allow(clippy::expl_impl_clone_on_copy))] |
| 25 | impl<'a, X, Y> Clone for Data<'a, X, Y> { |
| 26 | fn clone(&self) -> Data<'a, X, Y> { |
| 27 | *self |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | impl<'a, X, Y> Data<'a, X, Y> { |
| 32 | /// Returns the length of the data set |
| 33 | pub fn len(&self) -> usize { |
| 34 | self.0.len() |
| 35 | } |
| 36 | |
| 37 | /// Iterate over the data set |
| 38 | pub fn iter(&self) -> Pairs<'a, X, Y> { |
| 39 | Pairs { |
| 40 | data: *self, |
| 41 | state: 0, |
| 42 | } |
| 43 | } |
| 44 | } |
| 45 | |
| 46 | impl<'a, X, Y> Data<'a, X, Y> |
| 47 | where |
| 48 | X: Float, |
| 49 | Y: Float, |
| 50 | { |
| 51 | /// Creates a new data set from two existing slices |
| 52 | pub fn new(xs: &'a [X], ys: &'a [Y]) -> Data<'a, X, Y> { |
| 53 | assert!( |
| 54 | xs.len() == ys.len() |
| 55 | && xs.len() > 1 |
| 56 | && xs.iter().all(|x| !x.is_nan()) |
| 57 | && ys.iter().all(|y| !y.is_nan()) |
| 58 | ); |
| 59 | |
| 60 | Data(xs, ys) |
| 61 | } |
| 62 | |
| 63 | // TODO Remove the `T` parameter in favor of `S::Output` |
| 64 | /// Returns the bootstrap distributions of the parameters estimated by the `statistic` |
| 65 | /// |
| 66 | /// - Multi-threaded |
| 67 | /// - Time: `O(nresamples)` |
| 68 | /// - Memory: `O(nresamples)` |
| 69 | pub fn bootstrap<T, S>(&self, nresamples: usize, statistic: S) -> T::Distributions |
| 70 | where |
| 71 | S: Fn(Data<X, Y>) -> T + Sync, |
| 72 | T: Tuple + Send, |
| 73 | T::Distributions: Send, |
| 74 | T::Builder: Send, |
| 75 | { |
| 76 | #[cfg (feature = "rayon" )] |
| 77 | { |
| 78 | (0..nresamples) |
| 79 | .into_par_iter() |
| 80 | .map_init( |
| 81 | || Resamples::new(*self), |
| 82 | |resamples, _| statistic(resamples.next()), |
| 83 | ) |
| 84 | .fold( |
| 85 | || T::Builder::new(0), |
| 86 | |mut sub_distributions, sample| { |
| 87 | sub_distributions.push(sample); |
| 88 | sub_distributions |
| 89 | }, |
| 90 | ) |
| 91 | .reduce( |
| 92 | || T::Builder::new(0), |
| 93 | |mut a, mut b| { |
| 94 | a.extend(&mut b); |
| 95 | a |
| 96 | }, |
| 97 | ) |
| 98 | .complete() |
| 99 | } |
| 100 | #[cfg (not(feature = "rayon" ))] |
| 101 | { |
| 102 | let mut resamples = Resamples::new(*self); |
| 103 | (0..nresamples) |
| 104 | .map(|_| statistic(resamples.next())) |
| 105 | .fold(T::Builder::new(0), |mut sub_distributions, sample| { |
| 106 | sub_distributions.push(sample); |
| 107 | sub_distributions |
| 108 | }) |
| 109 | .complete() |
| 110 | } |
| 111 | } |
| 112 | |
| 113 | /// Returns a view into the `X` data |
| 114 | pub fn x(&self) -> &'a Sample<X> { |
| 115 | Sample::new(self.0) |
| 116 | } |
| 117 | |
| 118 | /// Returns a view into the `Y` data |
| 119 | pub fn y(&self) -> &'a Sample<Y> { |
| 120 | Sample::new(self.1) |
| 121 | } |
| 122 | } |
| 123 | |
| 124 | /// Iterator over `Data` |
| 125 | pub struct Pairs<'a, X: 'a, Y: 'a> { |
| 126 | data: Data<'a, X, Y>, |
| 127 | state: usize, |
| 128 | } |
| 129 | |
| 130 | impl<'a, X, Y> Iterator for Pairs<'a, X, Y> { |
| 131 | type Item = (&'a X, &'a Y); |
| 132 | |
| 133 | fn next(&mut self) -> Option<(&'a X, &'a Y)> { |
| 134 | if self.state < self.data.len() { |
| 135 | let i = self.state; |
| 136 | self.state += 1; |
| 137 | |
| 138 | // This is safe because i will always be < self.data.{0,1}.len() |
| 139 | debug_assert!(i < self.data.0.len()); |
| 140 | debug_assert!(i < self.data.1.len()); |
| 141 | unsafe { Some((self.data.0.get_unchecked(i), self.data.1.get_unchecked(i))) } |
| 142 | } else { |
| 143 | None |
| 144 | } |
| 145 | } |
| 146 | } |
| 147 | |