1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // p_square_quantile.hpp |
3 | // |
4 | // Copyright 2005 Daniel Egloff. Distributed under the Boost |
5 | // Software License, Version 1.0. (See accompanying file |
6 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
7 | |
8 | #ifndef BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_QUANTILE_HPP_DE_01_01_2006 |
9 | #define BOOST_ACCUMULATORS_STATISTICS_P_SQUARE_QUANTILE_HPP_DE_01_01_2006 |
10 | |
11 | #include <cmath> |
12 | #include <functional> |
13 | #include <boost/array.hpp> |
14 | #include <boost/mpl/placeholders.hpp> |
15 | #include <boost/type_traits/is_same.hpp> |
16 | #include <boost/parameter/keyword.hpp> |
17 | #include <boost/accumulators/framework/accumulator_base.hpp> |
18 | #include <boost/accumulators/framework/extractor.hpp> |
19 | #include <boost/accumulators/numeric/functional.hpp> |
20 | #include <boost/accumulators/framework/parameters/sample.hpp> |
21 | #include <boost/accumulators/framework/depends_on.hpp> |
22 | #include <boost/accumulators/statistics_fwd.hpp> |
23 | #include <boost/accumulators/statistics/count.hpp> |
24 | #include <boost/accumulators/statistics/parameters/quantile_probability.hpp> |
25 | #include <boost/serialization/boost_array.hpp> |
26 | |
27 | namespace boost { namespace accumulators |
28 | { |
29 | |
30 | namespace impl |
31 | { |
32 | /////////////////////////////////////////////////////////////////////////////// |
33 | // p_square_quantile_impl |
34 | // single quantile estimation |
35 | /** |
36 | @brief Single quantile estimation with the \f$P^2\f$ algorithm |
37 | |
38 | The \f$P^2\f$ algorithm estimates a quantile dynamically without storing samples. Instead of |
39 | storing the whole sample cumulative distribution, only five points (markers) are stored. The heights |
40 | of these markers are the minimum and the maximum of the samples and the current estimates of the |
41 | \f$(p/2)\f$-, \f$p\f$- and \f$(1+p)/2\f$-quantiles. Their positions are equal to the number |
42 | of samples that are smaller or equal to the markers. Each time a new samples is recorded, the |
43 | positions of the markers are updated and if necessary their heights are adjusted using a piecewise- |
44 | parabolic formula. |
45 | |
46 | For further details, see |
47 | |
48 | R. Jain and I. Chlamtac, The P^2 algorithm for dynamic calculation of quantiles and |
49 | histograms without storing observations, Communications of the ACM, |
50 | Volume 28 (October), Number 10, 1985, p. 1076-1085. |
51 | |
52 | @param quantile_probability |
53 | */ |
54 | template<typename Sample, typename Impl> |
55 | struct p_square_quantile_impl |
56 | : accumulator_base |
57 | { |
58 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; |
59 | typedef array<float_type, 5> array_type; |
60 | // for boost::result_of |
61 | typedef float_type result_type; |
62 | |
63 | template<typename Args> |
64 | p_square_quantile_impl(Args const &args) |
65 | : p(is_same<Impl, for_median>::value ? float_type(0.5) : args[quantile_probability | float_type(0.5)]) |
66 | , heights() |
67 | , actual_positions() |
68 | , desired_positions() |
69 | , positions_increments() |
70 | { |
71 | for(std::size_t i = 0; i < 5; ++i) |
72 | { |
73 | this->actual_positions[i] = i + float_type(1.); |
74 | } |
75 | |
76 | this->desired_positions[0] = float_type(1.); |
77 | this->desired_positions[1] = float_type(1.) + float_type(2.) * this->p; |
78 | this->desired_positions[2] = float_type(1.) + float_type(4.) * this->p; |
79 | this->desired_positions[3] = float_type(3.) + float_type(2.) * this->p; |
80 | this->desired_positions[4] = float_type(5.); |
81 | |
82 | |
83 | this->positions_increments[0] = float_type(0.); |
84 | this->positions_increments[1] = this->p / float_type(2.); |
85 | this->positions_increments[2] = this->p; |
86 | this->positions_increments[3] = (float_type(1.) + this->p) / float_type(2.); |
87 | this->positions_increments[4] = float_type(1.); |
88 | } |
89 | |
90 | template<typename Args> |
91 | void operator ()(Args const &args) |
92 | { |
93 | std::size_t cnt = count(args); |
94 | |
95 | // accumulate 5 first samples |
96 | if(cnt <= 5) |
97 | { |
98 | this->heights[cnt - 1] = args[sample]; |
99 | |
100 | // complete the initialization of heights by sorting |
101 | if(cnt == 5) |
102 | { |
103 | std::sort(this->heights.begin(), this->heights.end()); |
104 | } |
105 | } |
106 | else |
107 | { |
108 | std::size_t sample_cell = 1; // k |
109 | |
110 | // find cell k such that heights[k-1] <= args[sample] < heights[k] and adjust extreme values |
111 | if (args[sample] < this->heights[0]) |
112 | { |
113 | this->heights[0] = args[sample]; |
114 | sample_cell = 1; |
115 | } |
116 | else if (this->heights[4] <= args[sample]) |
117 | { |
118 | this->heights[4] = args[sample]; |
119 | sample_cell = 4; |
120 | } |
121 | else |
122 | { |
123 | typedef typename array_type::iterator iterator; |
124 | iterator it = std::upper_bound( |
125 | this->heights.begin() |
126 | , this->heights.end() |
127 | , args[sample] |
128 | ); |
129 | |
130 | sample_cell = std::distance(this->heights.begin(), it); |
131 | } |
132 | |
133 | // update positions of markers above sample_cell |
134 | for(std::size_t i = sample_cell; i < 5; ++i) |
135 | { |
136 | ++this->actual_positions[i]; |
137 | } |
138 | |
139 | // update desired positions of all markers |
140 | for(std::size_t i = 0; i < 5; ++i) |
141 | { |
142 | this->desired_positions[i] += this->positions_increments[i]; |
143 | } |
144 | |
145 | // adjust heights and actual positions of markers 1 to 3 if necessary |
146 | for(std::size_t i = 1; i <= 3; ++i) |
147 | { |
148 | // offset to desired positions |
149 | float_type d = this->desired_positions[i] - this->actual_positions[i]; |
150 | |
151 | // offset to next position |
152 | float_type dp = this->actual_positions[i + 1] - this->actual_positions[i]; |
153 | |
154 | // offset to previous position |
155 | float_type dm = this->actual_positions[i - 1] - this->actual_positions[i]; |
156 | |
157 | // height ds |
158 | float_type hp = (this->heights[i + 1] - this->heights[i]) / dp; |
159 | float_type hm = (this->heights[i - 1] - this->heights[i]) / dm; |
160 | |
161 | if((d >= float_type(1.) && dp > float_type(1.)) || (d <= float_type(-1.) && dm < float_type(-1.))) |
162 | { |
163 | short sign_d = static_cast<short>(d / std::abs(d)); |
164 | |
165 | // try adjusting heights[i] using p-squared formula |
166 | float_type h = this->heights[i] + sign_d / (dp - dm) * ((sign_d - dm) * hp |
167 | + (dp - sign_d) * hm); |
168 | |
169 | if(this->heights[i - 1] < h && h < this->heights[i + 1]) |
170 | { |
171 | this->heights[i] = h; |
172 | } |
173 | else |
174 | { |
175 | // use linear formula |
176 | if(d > float_type(0)) |
177 | { |
178 | this->heights[i] += hp; |
179 | } |
180 | if(d < float_type(0)) |
181 | { |
182 | this->heights[i] -= hm; |
183 | } |
184 | } |
185 | this->actual_positions[i] += sign_d; |
186 | } |
187 | } |
188 | } |
189 | } |
190 | |
191 | result_type result(dont_care) const |
192 | { |
193 | return this->heights[2]; |
194 | } |
195 | |
196 | // make this accumulator serializeable |
197 | // TODO: do we need to split to load/save and verify that P did not change? |
198 | template<class Archive> |
199 | void serialize(Archive & ar, const unsigned int file_version) |
200 | { |
201 | ar & p; |
202 | ar & heights; |
203 | ar & actual_positions; |
204 | ar & desired_positions; |
205 | ar & positions_increments; |
206 | } |
207 | |
208 | private: |
209 | float_type p; // the quantile probability p |
210 | array_type heights; // q_i |
211 | array_type actual_positions; // n_i |
212 | array_type desired_positions; // n'_i |
213 | array_type positions_increments; // dn'_i |
214 | }; |
215 | |
216 | } // namespace detail |
217 | |
218 | /////////////////////////////////////////////////////////////////////////////// |
219 | // tag::p_square_quantile |
220 | // |
221 | namespace tag |
222 | { |
223 | struct p_square_quantile |
224 | : depends_on<count> |
225 | { |
226 | /// INTERNAL ONLY |
227 | /// |
228 | typedef accumulators::impl::p_square_quantile_impl<mpl::_1, regular> impl; |
229 | }; |
230 | struct p_square_quantile_for_median |
231 | : depends_on<count> |
232 | { |
233 | /// INTERNAL ONLY |
234 | /// |
235 | typedef accumulators::impl::p_square_quantile_impl<mpl::_1, for_median> impl; |
236 | }; |
237 | } |
238 | |
239 | /////////////////////////////////////////////////////////////////////////////// |
240 | // extract::p_square_quantile |
241 | // extract::p_square_quantile_for_median |
242 | // |
243 | namespace extract |
244 | { |
245 | extractor<tag::p_square_quantile> const = {}; |
246 | extractor<tag::p_square_quantile_for_median> const = {}; |
247 | |
248 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile) |
249 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(p_square_quantile_for_median) |
250 | } |
251 | |
252 | using extract::p_square_quantile; |
253 | using extract::p_square_quantile_for_median; |
254 | |
255 | // So that p_square_quantile can be automatically substituted with |
256 | // weighted_p_square_quantile when the weight parameter is non-void |
257 | template<> |
258 | struct as_weighted_feature<tag::p_square_quantile> |
259 | { |
260 | typedef tag::weighted_p_square_quantile type; |
261 | }; |
262 | |
263 | template<> |
264 | struct feature_of<tag::weighted_p_square_quantile> |
265 | : feature_of<tag::p_square_quantile> |
266 | { |
267 | }; |
268 | |
269 | }} // namespace boost::accumulators |
270 | |
271 | #endif |
272 | |