1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // median.hpp |
3 | // |
4 | // Copyright 2006 Eric Niebler, Olivier Gygi. Distributed under the Boost |
5 | // Software License, Version 1.0. (See accompanying file |
6 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
7 | |
8 | #ifndef BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005 |
9 | #define BOOST_ACCUMULATORS_STATISTICS_MEDIAN_HPP_EAN_28_10_2005 |
10 | |
11 | #include <boost/mpl/placeholders.hpp> |
12 | #include <boost/range/iterator_range.hpp> |
13 | #include <boost/accumulators/framework/accumulator_base.hpp> |
14 | #include <boost/accumulators/framework/extractor.hpp> |
15 | #include <boost/accumulators/numeric/functional.hpp> |
16 | #include <boost/accumulators/framework/parameters/sample.hpp> |
17 | #include <boost/accumulators/framework/depends_on.hpp> |
18 | #include <boost/accumulators/statistics_fwd.hpp> |
19 | #include <boost/accumulators/statistics/count.hpp> |
20 | #include <boost/accumulators/statistics/p_square_quantile.hpp> |
21 | #include <boost/accumulators/statistics/density.hpp> |
22 | #include <boost/accumulators/statistics/p_square_cumul_dist.hpp> |
23 | |
24 | namespace boost { namespace accumulators |
25 | { |
26 | |
27 | namespace impl |
28 | { |
29 | /////////////////////////////////////////////////////////////////////////////// |
30 | // median_impl |
31 | // |
32 | /** |
33 | @brief Median estimation based on the \f$P^2\f$ quantile estimator |
34 | |
35 | The \f$P^2\f$ algorithm is invoked with a quantile probability of 0.5. |
36 | */ |
37 | template<typename Sample> |
38 | struct median_impl |
39 | : accumulator_base |
40 | { |
41 | // for boost::result_of |
42 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type result_type; |
43 | |
44 | median_impl(dont_care) {} |
45 | |
46 | template<typename Args> |
47 | result_type result(Args const &args) const |
48 | { |
49 | return p_square_quantile_for_median(args); |
50 | } |
51 | |
52 | // serialization is done by accumulators it depends on |
53 | template<class Archive> |
54 | void serialize(Archive & ar, const unsigned int file_version) {} |
55 | }; |
56 | /////////////////////////////////////////////////////////////////////////////// |
57 | // with_density_median_impl |
58 | // |
59 | /** |
60 | @brief Median estimation based on the density estimator |
61 | |
62 | The algorithm determines the bin in which the \f$0.5*cnt\f$-th sample lies, \f$cnt\f$ being |
63 | the total number of samples. It returns the approximate horizontal position of this sample, |
64 | based on a linear interpolation inside the bin. |
65 | */ |
66 | template<typename Sample> |
67 | struct with_density_median_impl |
68 | : accumulator_base |
69 | { |
70 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; |
71 | typedef std::vector<std::pair<float_type, float_type> > histogram_type; |
72 | typedef iterator_range<typename histogram_type::iterator> range_type; |
73 | // for boost::result_of |
74 | typedef float_type result_type; |
75 | |
76 | template<typename Args> |
77 | with_density_median_impl(Args const &args) |
78 | : sum(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
79 | , is_dirty(true) |
80 | { |
81 | } |
82 | |
83 | void operator ()(dont_care) |
84 | { |
85 | this->is_dirty = true; |
86 | } |
87 | |
88 | |
89 | template<typename Args> |
90 | result_type result(Args const &args) const |
91 | { |
92 | if (this->is_dirty) |
93 | { |
94 | this->is_dirty = false; |
95 | |
96 | std::size_t cnt = count(args); |
97 | range_type histogram = density(args); |
98 | typename range_type::iterator it = histogram.begin(); |
99 | while (this->sum < 0.5 * cnt) |
100 | { |
101 | this->sum += it->second * cnt; |
102 | ++it; |
103 | } |
104 | --it; |
105 | float_type over = numeric::fdiv(this->sum - 0.5 * cnt, it->second * cnt); |
106 | this->median = it->first * over + (it + 1)->first * (1. - over); |
107 | } |
108 | |
109 | return this->median; |
110 | } |
111 | |
112 | // make this accumulator serializeable |
113 | template<class Archive> |
114 | void serialize(Archive & ar, const unsigned int file_version) |
115 | { |
116 | ar & sum; |
117 | ar & is_dirty; |
118 | ar & median; |
119 | } |
120 | |
121 | |
122 | private: |
123 | mutable float_type sum; |
124 | mutable bool is_dirty; |
125 | mutable float_type median; |
126 | }; |
127 | |
128 | /////////////////////////////////////////////////////////////////////////////// |
129 | // with_p_square_cumulative_distribution_median_impl |
130 | // |
131 | /** |
132 | @brief Median estimation based on the \f$P^2\f$ cumulative distribution estimator |
133 | |
134 | The algorithm determines the first (leftmost) bin with a height exceeding 0.5. It |
135 | returns the approximate horizontal position of where the cumulative distribution |
136 | equals 0.5, based on a linear interpolation inside the bin. |
137 | */ |
138 | template<typename Sample> |
139 | struct with_p_square_cumulative_distribution_median_impl |
140 | : accumulator_base |
141 | { |
142 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; |
143 | typedef std::vector<std::pair<float_type, float_type> > histogram_type; |
144 | typedef iterator_range<typename histogram_type::iterator> range_type; |
145 | // for boost::result_of |
146 | typedef float_type result_type; |
147 | |
148 | with_p_square_cumulative_distribution_median_impl(dont_care) |
149 | : is_dirty(true) |
150 | { |
151 | } |
152 | |
153 | void operator ()(dont_care) |
154 | { |
155 | this->is_dirty = true; |
156 | } |
157 | |
158 | template<typename Args> |
159 | result_type result(Args const &args) const |
160 | { |
161 | if (this->is_dirty) |
162 | { |
163 | this->is_dirty = false; |
164 | |
165 | range_type histogram = p_square_cumulative_distribution(args); |
166 | typename range_type::iterator it = histogram.begin(); |
167 | while (it->second < 0.5) |
168 | { |
169 | ++it; |
170 | } |
171 | float_type over = numeric::fdiv(it->second - 0.5, it->second - (it - 1)->second); |
172 | this->median = it->first * over + (it + 1)->first * ( 1. - over ); |
173 | } |
174 | |
175 | return this->median; |
176 | } |
177 | |
178 | // make this accumulator serializeable |
179 | template<class Archive> |
180 | void serialize(Archive & ar, const unsigned int file_version) |
181 | { |
182 | ar & is_dirty; |
183 | ar & median; |
184 | } |
185 | |
186 | private: |
187 | |
188 | mutable bool is_dirty; |
189 | mutable float_type median; |
190 | }; |
191 | |
192 | } // namespace impl |
193 | |
194 | /////////////////////////////////////////////////////////////////////////////// |
195 | // tag::median |
196 | // tag::with_densisty_median |
197 | // tag::with_p_square_cumulative_distribution_median |
198 | // |
199 | namespace tag |
200 | { |
201 | struct median |
202 | : depends_on<p_square_quantile_for_median> |
203 | { |
204 | /// INTERNAL ONLY |
205 | /// |
206 | typedef accumulators::impl::median_impl<mpl::_1> impl; |
207 | }; |
208 | struct with_density_median |
209 | : depends_on<count, density> |
210 | { |
211 | /// INTERNAL ONLY |
212 | /// |
213 | typedef accumulators::impl::with_density_median_impl<mpl::_1> impl; |
214 | }; |
215 | struct with_p_square_cumulative_distribution_median |
216 | : depends_on<p_square_cumulative_distribution> |
217 | { |
218 | /// INTERNAL ONLY |
219 | /// |
220 | typedef accumulators::impl::with_p_square_cumulative_distribution_median_impl<mpl::_1> impl; |
221 | }; |
222 | } |
223 | |
224 | /////////////////////////////////////////////////////////////////////////////// |
225 | // extract::median |
226 | // extract::with_density_median |
227 | // extract::with_p_square_cumulative_distribution_median |
228 | // |
229 | namespace extract |
230 | { |
231 | extractor<tag::median> const = {}; |
232 | extractor<tag::with_density_median> const = {}; |
233 | extractor<tag::with_p_square_cumulative_distribution_median> const = {}; |
234 | |
235 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(median) |
236 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_density_median) |
237 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(with_p_square_cumulative_distribution_median) |
238 | } |
239 | |
240 | using extract::median; |
241 | using extract::with_density_median; |
242 | using extract::with_p_square_cumulative_distribution_median; |
243 | |
244 | // median(with_p_square_quantile) -> median |
245 | template<> |
246 | struct as_feature<tag::median(with_p_square_quantile)> |
247 | { |
248 | typedef tag::median type; |
249 | }; |
250 | |
251 | // median(with_density) -> with_density_median |
252 | template<> |
253 | struct as_feature<tag::median(with_density)> |
254 | { |
255 | typedef tag::with_density_median type; |
256 | }; |
257 | |
258 | // median(with_p_square_cumulative_distribution) -> with_p_square_cumulative_distribution_median |
259 | template<> |
260 | struct as_feature<tag::median(with_p_square_cumulative_distribution)> |
261 | { |
262 | typedef tag::with_p_square_cumulative_distribution_median type; |
263 | }; |
264 | |
265 | // for the purposes of feature-based dependency resolution, |
266 | // with_density_median and with_p_square_cumulative_distribution_median |
267 | // provide the same feature as median |
268 | template<> |
269 | struct feature_of<tag::with_density_median> |
270 | : feature_of<tag::median> |
271 | { |
272 | }; |
273 | |
274 | template<> |
275 | struct feature_of<tag::with_p_square_cumulative_distribution_median> |
276 | : feature_of<tag::median> |
277 | { |
278 | }; |
279 | |
280 | // So that median can be automatically substituted with |
281 | // weighted_median when the weight parameter is non-void. |
282 | template<> |
283 | struct as_weighted_feature<tag::median> |
284 | { |
285 | typedef tag::weighted_median type; |
286 | }; |
287 | |
288 | template<> |
289 | struct feature_of<tag::weighted_median> |
290 | : feature_of<tag::median> |
291 | { |
292 | }; |
293 | |
294 | // So that with_density_median can be automatically substituted with |
295 | // with_density_weighted_median when the weight parameter is non-void. |
296 | template<> |
297 | struct as_weighted_feature<tag::with_density_median> |
298 | { |
299 | typedef tag::with_density_weighted_median type; |
300 | }; |
301 | |
302 | template<> |
303 | struct feature_of<tag::with_density_weighted_median> |
304 | : feature_of<tag::with_density_median> |
305 | { |
306 | }; |
307 | |
308 | // So that with_p_square_cumulative_distribution_median can be automatically substituted with |
309 | // with_p_square_cumulative_distribution_weighted_median when the weight parameter is non-void. |
310 | template<> |
311 | struct as_weighted_feature<tag::with_p_square_cumulative_distribution_median> |
312 | { |
313 | typedef tag::with_p_square_cumulative_distribution_weighted_median type; |
314 | }; |
315 | |
316 | template<> |
317 | struct feature_of<tag::with_p_square_cumulative_distribution_weighted_median> |
318 | : feature_of<tag::with_p_square_cumulative_distribution_median> |
319 | { |
320 | }; |
321 | |
322 | }} // namespace boost::accumulators |
323 | |
324 | #endif |
325 | |