1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // p_square_cumulative_distribution.hpp |
3 | // |
4 | // Copyright 2005 Daniel Egloff, Olivier Gygi. Distributed under the Boost |
5 | // Software License, Version 1.0. (See accompanying file |
6 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
7 | |
8 | #ifndef BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_CUMUL_DIST_HPP_DE_01_01_2006 |
9 | #define BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_CUMUL_DIST_HPP_DE_01_01_2006 |
10 | |
11 | #include <vector> |
12 | #include <functional> |
13 | #include <boost/parameter/keyword.hpp> |
14 | #include <boost/range.hpp> |
15 | #include <boost/mpl/placeholders.hpp> |
16 | #include <boost/accumulators/accumulators_fwd.hpp> |
17 | #include <boost/accumulators/framework/accumulator_base.hpp> |
18 | #include <boost/accumulators/framework/extractor.hpp> |
19 | #include <boost/accumulators/numeric/functional.hpp> |
20 | #include <boost/accumulators/framework/parameters/sample.hpp> |
21 | #include <boost/accumulators/statistics_fwd.hpp> |
22 | #include <boost/accumulators/statistics/count.hpp> |
23 | #include <boost/serialization/vector.hpp> |
24 | #include <boost/serialization/utility.hpp> |
25 | |
26 | namespace boost { namespace accumulators |
27 | { |
28 | /////////////////////////////////////////////////////////////////////////////// |
29 | // num_cells named parameter |
30 | // |
31 | BOOST_PARAMETER_NESTED_KEYWORD(tag, p_square_cumulative_distribution_num_cells, num_cells) |
32 | |
33 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_cumulative_distribution_num_cells) |
34 | |
35 | namespace impl |
36 | { |
37 | /////////////////////////////////////////////////////////////////////////////// |
38 | // p_square_cumulative_distribution_impl |
39 | // cumulative_distribution calculation (as histogram) |
40 | /** |
41 | @brief Histogram calculation of the cumulative distribution with the \f$P^2\f$ algorithm |
42 | |
43 | A histogram of the sample cumulative distribution is computed dynamically without storing samples |
44 | based on the \f$ P^2 \f$ algorithm. The returned histogram has a specifiable amount (num_cells) |
45 | equiprobable (and not equal-sized) cells. |
46 | |
47 | For further details, see |
48 | |
49 | R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and |
50 | histograms without storing observations, Communications of the ACM, |
51 | Volume 28 (October), Number 10, 1985, p. 1076-1085. |
52 | |
53 | @param p_square_cumulative_distribution_num_cells. |
54 | */ |
55 | template<typename Sample> |
56 | struct p_square_cumulative_distribution_impl |
57 | : accumulator_base |
58 | { |
59 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; |
60 | typedef std::vector<float_type> array_type; |
61 | typedef std::vector<std::pair<float_type, float_type> > histogram_type; |
62 | // for boost::result_of |
63 | typedef iterator_range<typename histogram_type::iterator> result_type; |
64 | |
65 | template<typename Args> |
66 | p_square_cumulative_distribution_impl(Args const &args) |
67 | : num_cells(args[p_square_cumulative_distribution_num_cells]) |
68 | , heights(num_cells + 1) |
69 | , actual_positions(num_cells + 1) |
70 | , desired_positions(num_cells + 1) |
71 | , positions_increments(num_cells + 1) |
72 | , histogram(num_cells + 1) |
73 | , is_dirty(true) |
74 | { |
75 | std::size_t b = this->num_cells; |
76 | |
77 | for (std::size_t i = 0; i < b + 1; ++i) |
78 | { |
79 | this->actual_positions[i] = i + 1.; |
80 | this->desired_positions[i] = i + 1.; |
81 | this->positions_increments[i] = numeric::fdiv(i, b); |
82 | } |
83 | } |
84 | |
85 | template<typename Args> |
86 | void operator ()(Args const &args) |
87 | { |
88 | this->is_dirty = true; |
89 | |
90 | std::size_t cnt = count(args); |
91 | std::size_t sample_cell = 1; // k |
92 | std::size_t b = this->num_cells; |
93 | |
94 | // accumulate num_cells + 1 first samples |
95 | if (cnt <= b + 1) |
96 | { |
97 | this->heights[cnt - 1] = args[sample]; |
98 | |
99 | // complete the initialization of heights by sorting |
100 | if (cnt == b + 1) |
101 | { |
102 | std::sort(this->heights.begin(), this->heights.end()); |
103 | } |
104 | } |
105 | else |
106 | { |
107 | // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values |
108 | if (args[sample] < this->heights[0]) |
109 | { |
110 | this->heights[0] = args[sample]; |
111 | sample_cell = 1; |
112 | } |
113 | else if (this->heights[b] <= args[sample]) |
114 | { |
115 | this->heights[b] = args[sample]; |
116 | sample_cell = b; |
117 | } |
118 | else |
119 | { |
120 | typename array_type::iterator it; |
121 | it = std::upper_bound( |
122 | this->heights.begin() |
123 | , this->heights.end() |
124 | , args[sample] |
125 | ); |
126 | |
127 | sample_cell = std::distance(this->heights.begin(), it); |
128 | } |
129 | |
130 | // increment positions of markers above sample_cell |
131 | for (std::size_t i = sample_cell; i < b + 1; ++i) |
132 | { |
133 | ++this->actual_positions[i]; |
134 | } |
135 | |
136 | // update desired position of markers 2 to num_cells + 1 |
137 | // (desired position of first marker is always 1) |
138 | for (std::size_t i = 1; i < b + 1; ++i) |
139 | { |
140 | this->desired_positions[i] += this->positions_increments[i]; |
141 | } |
142 | |
143 | // adjust heights of markers 2 to num_cells if necessary |
144 | for (std::size_t i = 1; i < b; ++i) |
145 | { |
146 | // offset to desire position |
147 | float_type d = this->desired_positions[i] - this->actual_positions[i]; |
148 | |
149 | // offset to next position |
150 | float_type dp = this->actual_positions[i + 1] - this->actual_positions[i]; |
151 | |
152 | // offset to previous position |
153 | float_type dm = this->actual_positions[i - 1] - this->actual_positions[i]; |
154 | |
155 | // height ds |
156 | float_type hp = (this->heights[i + 1] - this->heights[i]) / dp; |
157 | float_type hm = (this->heights[i - 1] - this->heights[i]) / dm; |
158 | |
159 | if ( ( d >= 1. && dp > 1. ) || ( d <= -1. && dm < -1. ) ) |
160 | { |
161 | short sign_d = static_cast<short>(d / std::abs(d)); |
162 | |
163 | // try adjusting heights[i] using p-squared formula |
164 | float_type h = this->heights[i] + sign_d / (dp - dm) * ( (sign_d - dm) * hp + (dp - sign_d) * hm ); |
165 | |
166 | if ( this->heights[i - 1] < h && h < this->heights[i + 1] ) |
167 | { |
168 | this->heights[i] = h; |
169 | } |
170 | else |
171 | { |
172 | // use linear formula |
173 | if (d>0) |
174 | { |
175 | this->heights[i] += hp; |
176 | } |
177 | if (d<0) |
178 | { |
179 | this->heights[i] -= hm; |
180 | } |
181 | } |
182 | this->actual_positions[i] += sign_d; |
183 | } |
184 | } |
185 | } |
186 | } |
187 | |
188 | template<typename Args> |
189 | result_type result(Args const &args) const |
190 | { |
191 | if (this->is_dirty) |
192 | { |
193 | this->is_dirty = false; |
194 | |
195 | // creates a vector of std::pair where each pair i holds |
196 | // the values heights[i] (x-axis of histogram) and |
197 | // actual_positions[i] / cnt (y-axis of histogram) |
198 | |
199 | std::size_t cnt = count(args); |
200 | |
201 | for (std::size_t i = 0; i < this->histogram.size(); ++i) |
202 | { |
203 | this->histogram[i] = std::make_pair(this->heights[i], numeric::fdiv(this->actual_positions[i], cnt)); |
204 | } |
205 | } |
206 | //return histogram; |
207 | return make_iterator_range(this->histogram); |
208 | } |
209 | |
210 | // make this accumulator serializeable |
211 | // TODO split to save/load and check on parameters provided in ctor |
212 | template<class Archive> |
213 | void serialize(Archive & ar, const unsigned int file_version) |
214 | { |
215 | ar & num_cells; |
216 | ar & heights; |
217 | ar & actual_positions; |
218 | ar & desired_positions; |
219 | ar & positions_increments; |
220 | ar & histogram; |
221 | ar & is_dirty; |
222 | } |
223 | |
224 | private: |
225 | std::size_t num_cells; // number of cells b |
226 | array_type heights; // q_i |
227 | array_type actual_positions; // n_i |
228 | array_type desired_positions; // n'_i |
229 | array_type positions_increments; // dn'_i |
230 | mutable histogram_type histogram; // histogram |
231 | mutable bool is_dirty; |
232 | }; |
233 | |
234 | } // namespace detail |
235 | |
236 | /////////////////////////////////////////////////////////////////////////////// |
237 | // tag::p_square_cumulative_distribution |
238 | // |
239 | namespace tag |
240 | { |
241 | struct p_square_cumulative_distribution |
242 | : depends_on<count> |
243 | , p_square_cumulative_distribution_num_cells |
244 | { |
245 | /// INTERNAL ONLY |
246 | /// |
247 | typedef accumulators::impl::p_square_cumulative_distribution_impl<mpl::_1> impl; |
248 | }; |
249 | } |
250 | |
251 | /////////////////////////////////////////////////////////////////////////////// |
252 | // extract::p_square_cumulative_distribution |
253 | // |
254 | namespace extract |
255 | { |
256 | extractor<tag::p_square_cumulative_distribution> const = {}; |
257 | |
258 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_cumulative_distribution) |
259 | } |
260 | |
261 | using extract::p_square_cumulative_distribution; |
262 | |
263 | // So that p_square_cumulative_distribution can be automatically substituted with |
264 | // weighted_p_square_cumulative_distribution when the weight parameter is non-void |
265 | template<> |
266 | struct as_weighted_feature<tag::p_square_cumulative_distribution> |
267 | { |
268 | typedef tag::weighted_p_square_cumulative_distribution type; |
269 | }; |
270 | |
271 | template<> |
272 | struct feature_of<tag::weighted_p_square_cumulative_distribution> |
273 | : feature_of<tag::p_square_cumulative_distribution> |
274 | { |
275 | }; |
276 | |
277 | }} // namespace boost::accumulators |
278 | |
279 | #endif |
280 | |