1 | /////////////////////////////////////////////////////////////////////////////// |
2 | // peaks_over_threshold.hpp |
3 | // |
4 | // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost |
5 | // Software License, Version 1.0. (See accompanying file |
6 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
7 | |
8 | #ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 |
9 | #define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 |
10 | |
11 | #include <vector> |
12 | #include <limits> |
13 | #include <numeric> |
14 | #include <functional> |
15 | #include <boost/config/no_tr1/cmath.hpp> // pow |
16 | #include <sstream> // stringstream |
17 | #include <stdexcept> // runtime_error |
18 | #include <boost/throw_exception.hpp> |
19 | #include <boost/range.hpp> |
20 | #include <boost/mpl/if.hpp> |
21 | #include <boost/mpl/int.hpp> |
22 | #include <boost/mpl/placeholders.hpp> |
23 | #include <boost/parameter/keyword.hpp> |
24 | #include <boost/tuple/tuple.hpp> |
25 | #include <boost/accumulators/accumulators_fwd.hpp> |
26 | #include <boost/accumulators/framework/accumulator_base.hpp> |
27 | #include <boost/accumulators/framework/extractor.hpp> |
28 | #include <boost/accumulators/numeric/functional.hpp> |
29 | #include <boost/accumulators/framework/parameters/sample.hpp> |
30 | #include <boost/accumulators/framework/depends_on.hpp> |
31 | #include <boost/accumulators/statistics_fwd.hpp> |
32 | #include <boost/accumulators/statistics/parameters/quantile_probability.hpp> |
33 | #include <boost/accumulators/statistics/count.hpp> |
34 | #include <boost/accumulators/statistics/tail.hpp> |
35 | |
36 | #ifdef _MSC_VER |
37 | # pragma warning(push) |
38 | # pragma warning(disable: 4127) // conditional expression is constant |
39 | #endif |
40 | |
41 | namespace boost { namespace accumulators |
42 | { |
43 | |
44 | /////////////////////////////////////////////////////////////////////////////// |
45 | // threshold_probability and threshold named parameters |
46 | // |
47 | BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value) |
48 | BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability) |
49 | |
50 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value) |
51 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability) |
52 | |
53 | namespace impl |
54 | { |
55 | /////////////////////////////////////////////////////////////////////////////// |
56 | // peaks_over_threshold_impl |
57 | // works with an explicit threshold value and does not depend on order statistics |
58 | /** |
59 | @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation |
60 | |
61 | According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of |
62 | the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$ |
63 | may be approximated by a generalized Pareto distribution |
64 | \f[ |
65 | G_{\xi,\beta}(x) = |
66 | \left\{ |
67 | \begin{array}{ll} |
68 | \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\ |
69 | \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0, |
70 | \end{array} |
71 | \right. |
72 | \f] |
73 | with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf. |
74 | Hosking and Wallis (1987), |
75 | \f[ |
76 | \begin{array}{lll} |
77 | \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\ |
78 | \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right], |
79 | \end{array} |
80 | \f] |
81 | \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over |
82 | the threshold \f$u\f$. Equivalently, the distribution function |
83 | \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by |
84 | \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$ |
85 | can be written as |
86 | \f[ |
87 | F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u) |
88 | \f] |
89 | and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function |
90 | \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by |
91 | \f[ |
92 | \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u). |
93 | \f] |
94 | It can be shown that \f$\widehat{F}(x)\f$ is a generalized |
95 | Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$ |
96 | and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$, |
97 | one obtains an estimator for the \f$\alpha\f$-quantile, |
98 | \f[ |
99 | \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right], |
100 | \f] |
101 | and similarly an estimator for the (coherent) tail mean, |
102 | \f[ |
103 | \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi}, |
104 | \f] |
105 | cf. McNeil and Frey (2000). |
106 | |
107 | Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the |
108 | \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define |
109 | the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are |
110 | computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the |
111 | correct result. |
112 | |
113 | For further details, see |
114 | |
115 | J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution, |
116 | Technometrics, Volume 29, 1987, p. 339-349 |
117 | |
118 | A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series: |
119 | an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300 |
120 | |
121 | @param quantile_probability |
122 | @param pot_threshold_value |
123 | */ |
124 | template<typename Sample, typename LeftRight> |
125 | struct peaks_over_threshold_impl |
126 | : accumulator_base |
127 | { |
128 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; |
129 | // for boost::result_of |
130 | typedef boost::tuple<float_type, float_type, float_type> result_type; |
131 | // for left tail fitting, mirror the extreme values |
132 | typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign; |
133 | |
134 | template<typename Args> |
135 | peaks_over_threshold_impl(Args const &args) |
136 | : Nu_(0) |
137 | , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
138 | , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
139 | , threshold_(sign::value * args[pot_threshold_value]) |
140 | , fit_parameters_(boost::make_tuple(t0: 0., t1: 0., t2: 0.)) |
141 | , is_dirty_(true) |
142 | { |
143 | } |
144 | |
145 | template<typename Args> |
146 | void operator ()(Args const &args) |
147 | { |
148 | this->is_dirty_ = true; |
149 | |
150 | if (sign::value * args[sample] > this->threshold_) |
151 | { |
152 | this->mu_ += args[sample]; |
153 | this->sigma2_ += args[sample] * args[sample]; |
154 | ++this->Nu_; |
155 | } |
156 | } |
157 | |
158 | template<typename Args> |
159 | result_type result(Args const &args) const |
160 | { |
161 | if (this->is_dirty_) |
162 | { |
163 | this->is_dirty_ = false; |
164 | |
165 | std::size_t cnt = count(args); |
166 | |
167 | this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_); |
168 | this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_); |
169 | this->sigma2_ -= this->mu_ * this->mu_; |
170 | |
171 | float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt); |
172 | |
173 | float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_); |
174 | float_type xi_hat = 0.5 * ( 1. - tmp ); |
175 | float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp ); |
176 | float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat); |
177 | float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat; |
178 | this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); |
179 | } |
180 | |
181 | return this->fit_parameters_; |
182 | } |
183 | |
184 | // make this accumulator serializeable |
185 | // TODO: do we need to split to load/save and verify that threshold did not change? |
186 | template<class Archive> |
187 | void serialize(Archive & ar, const unsigned int file_version) |
188 | { |
189 | ar & Nu_; |
190 | ar & mu_; |
191 | ar & sigma2_; |
192 | ar & threshold_; |
193 | ar & get<0>(fit_parameters_); |
194 | ar & get<1>(fit_parameters_); |
195 | ar & get<2>(fit_parameters_); |
196 | ar & is_dirty_; |
197 | } |
198 | |
199 | private: |
200 | std::size_t Nu_; // number of samples larger than threshold |
201 | mutable float_type mu_; // mean of Nu_ largest samples |
202 | mutable float_type sigma2_; // variance of Nu_ largest samples |
203 | float_type threshold_; |
204 | mutable result_type fit_parameters_; // boost::tuple that stores fit parameters |
205 | mutable bool is_dirty_; |
206 | }; |
207 | |
208 | /////////////////////////////////////////////////////////////////////////////// |
209 | // peaks_over_threshold_prob_impl |
210 | // determines threshold from a given threshold probability using order statistics |
211 | /** |
212 | @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation |
213 | |
214 | @sa peaks_over_threshold_impl |
215 | |
216 | @param quantile_probability |
217 | @param pot_threshold_probability |
218 | */ |
219 | template<typename Sample, typename LeftRight> |
220 | struct peaks_over_threshold_prob_impl |
221 | : accumulator_base |
222 | { |
223 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; |
224 | // for boost::result_of |
225 | typedef boost::tuple<float_type, float_type, float_type> result_type; |
226 | // for left tail fitting, mirror the extreme values |
227 | typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign; |
228 | |
229 | template<typename Args> |
230 | peaks_over_threshold_prob_impl(Args const &args) |
231 | : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
232 | , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
233 | , threshold_probability_(args[pot_threshold_probability]) |
234 | , fit_parameters_(boost::make_tuple(t0: 0., t1: 0., t2: 0.)) |
235 | , is_dirty_(true) |
236 | { |
237 | } |
238 | |
239 | void operator ()(dont_care) |
240 | { |
241 | this->is_dirty_ = true; |
242 | } |
243 | |
244 | template<typename Args> |
245 | result_type result(Args const &args) const |
246 | { |
247 | if (this->is_dirty_) |
248 | { |
249 | this->is_dirty_ = false; |
250 | |
251 | std::size_t cnt = count(args); |
252 | |
253 | // the n'th cached sample provides an approximate threshold value u |
254 | std::size_t n = static_cast<std::size_t>( |
255 | std::ceil( |
256 | cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ ) |
257 | ) |
258 | ); |
259 | |
260 | // If n is in a valid range, return result, otherwise return NaN or throw exception |
261 | if ( n >= static_cast<std::size_t>(tail(args).size())) |
262 | { |
263 | if (std::numeric_limits<float_type>::has_quiet_NaN) |
264 | { |
265 | return boost::make_tuple( |
266 | std::numeric_limits<float_type>::quiet_NaN() |
267 | , std::numeric_limits<float_type>::quiet_NaN() |
268 | , std::numeric_limits<float_type>::quiet_NaN() |
269 | ); |
270 | } |
271 | else |
272 | { |
273 | std::ostringstream msg; |
274 | msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")" ; |
275 | boost::throw_exception(e: std::runtime_error(msg.str())); |
276 | return boost::make_tuple(Sample(0), Sample(0), Sample(0)); |
277 | } |
278 | } |
279 | else |
280 | { |
281 | float_type u = *(tail(args).begin() + n - 1) * sign::value; |
282 | |
283 | // compute mean and variance of samples above/under threshold value u |
284 | for (std::size_t i = 0; i < n; ++i) |
285 | { |
286 | mu_ += *(tail(args).begin() + i); |
287 | sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i)); |
288 | } |
289 | |
290 | this->mu_ = sign::value * numeric::fdiv(this->mu_, n); |
291 | this->sigma2_ = numeric::fdiv(this->sigma2_, n); |
292 | this->sigma2_ -= this->mu_ * this->mu_; |
293 | |
294 | if (is_same<LeftRight, left>::value) |
295 | this->threshold_probability_ = 1. - this->threshold_probability_; |
296 | |
297 | float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_); |
298 | float_type xi_hat = 0.5 * ( 1. - tmp ); |
299 | float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp ); |
300 | float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat); |
301 | float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat; |
302 | this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); |
303 | } |
304 | } |
305 | |
306 | return this->fit_parameters_; |
307 | } |
308 | |
309 | // make this accumulator serializeable |
310 | // TODO: do we need to split to load/save and verify that threshold did not change? |
311 | template<class Archive> |
312 | void serialize(Archive & ar, const unsigned int file_version) |
313 | { |
314 | ar & mu_; |
315 | ar & sigma2_; |
316 | ar & threshold_probability_; |
317 | ar & get<0>(fit_parameters_); |
318 | ar & get<1>(fit_parameters_); |
319 | ar & get<2>(fit_parameters_); |
320 | ar & is_dirty_; |
321 | } |
322 | |
323 | private: |
324 | mutable float_type mu_; // mean of samples above threshold u |
325 | mutable float_type sigma2_; // variance of samples above threshold u |
326 | mutable float_type threshold_probability_; |
327 | mutable result_type fit_parameters_; // boost::tuple that stores fit parameters |
328 | mutable bool is_dirty_; |
329 | }; |
330 | |
331 | } // namespace impl |
332 | |
333 | /////////////////////////////////////////////////////////////////////////////// |
334 | // tag::peaks_over_threshold |
335 | // |
336 | namespace tag |
337 | { |
338 | template<typename LeftRight> |
339 | struct peaks_over_threshold |
340 | : depends_on<count> |
341 | , pot_threshold_value |
342 | { |
343 | /// INTERNAL ONLY |
344 | /// |
345 | typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl; |
346 | }; |
347 | |
348 | template<typename LeftRight> |
349 | struct peaks_over_threshold_prob |
350 | : depends_on<count, tail<LeftRight> > |
351 | , pot_threshold_probability |
352 | { |
353 | /// INTERNAL ONLY |
354 | /// |
355 | typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl; |
356 | }; |
357 | |
358 | struct abstract_peaks_over_threshold |
359 | : depends_on<> |
360 | { |
361 | }; |
362 | } |
363 | |
364 | /////////////////////////////////////////////////////////////////////////////// |
365 | // extract::peaks_over_threshold |
366 | // |
367 | namespace extract |
368 | { |
369 | extractor<tag::abstract_peaks_over_threshold> const = {}; |
370 | |
371 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold) |
372 | } |
373 | |
374 | using extract::peaks_over_threshold; |
375 | |
376 | // peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight> |
377 | template<typename LeftRight> |
378 | struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)> |
379 | { |
380 | typedef tag::peaks_over_threshold<LeftRight> type; |
381 | }; |
382 | |
383 | // peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight> |
384 | template<typename LeftRight> |
385 | struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)> |
386 | { |
387 | typedef tag::peaks_over_threshold_prob<LeftRight> type; |
388 | }; |
389 | |
390 | template<typename LeftRight> |
391 | struct feature_of<tag::peaks_over_threshold<LeftRight> > |
392 | : feature_of<tag::abstract_peaks_over_threshold> |
393 | { |
394 | }; |
395 | |
396 | template<typename LeftRight> |
397 | struct feature_of<tag::peaks_over_threshold_prob<LeftRight> > |
398 | : feature_of<tag::abstract_peaks_over_threshold> |
399 | { |
400 | }; |
401 | |
402 | // So that peaks_over_threshold can be automatically substituted |
403 | // with weighted_peaks_over_threshold when the weight parameter is non-void. |
404 | template<typename LeftRight> |
405 | struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> > |
406 | { |
407 | typedef tag::weighted_peaks_over_threshold<LeftRight> type; |
408 | }; |
409 | |
410 | template<typename LeftRight> |
411 | struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> > |
412 | : feature_of<tag::peaks_over_threshold<LeftRight> > |
413 | {}; |
414 | |
415 | // So that peaks_over_threshold_prob can be automatically substituted |
416 | // with weighted_peaks_over_threshold_prob when the weight parameter is non-void. |
417 | template<typename LeftRight> |
418 | struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> > |
419 | { |
420 | typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type; |
421 | }; |
422 | |
423 | template<typename LeftRight> |
424 | struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> > |
425 | : feature_of<tag::peaks_over_threshold_prob<LeftRight> > |
426 | {}; |
427 | |
428 | }} // namespace boost::accumulators |
429 | |
430 | #ifdef _MSC_VER |
431 | # pragma warning(pop) |
432 | #endif |
433 | |
434 | #endif |
435 | |