| 1 | /////////////////////////////////////////////////////////////////////////////// |
| 2 | // peaks_over_threshold.hpp |
| 3 | // |
| 4 | // Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost |
| 5 | // Software License, Version 1.0. (See accompanying file |
| 6 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
| 7 | |
| 8 | #ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 |
| 9 | #define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006 |
| 10 | |
| 11 | #include <vector> |
| 12 | #include <limits> |
| 13 | #include <numeric> |
| 14 | #include <functional> |
| 15 | #include <boost/config/no_tr1/cmath.hpp> // pow |
| 16 | #include <sstream> // stringstream |
| 17 | #include <stdexcept> // runtime_error |
| 18 | #include <boost/throw_exception.hpp> |
| 19 | #include <boost/range.hpp> |
| 20 | #include <boost/mpl/if.hpp> |
| 21 | #include <boost/mpl/int.hpp> |
| 22 | #include <boost/mpl/placeholders.hpp> |
| 23 | #include <boost/parameter/keyword.hpp> |
| 24 | #include <boost/tuple/tuple.hpp> |
| 25 | #include <boost/accumulators/accumulators_fwd.hpp> |
| 26 | #include <boost/accumulators/framework/accumulator_base.hpp> |
| 27 | #include <boost/accumulators/framework/extractor.hpp> |
| 28 | #include <boost/accumulators/numeric/functional.hpp> |
| 29 | #include <boost/accumulators/framework/parameters/sample.hpp> |
| 30 | #include <boost/accumulators/framework/depends_on.hpp> |
| 31 | #include <boost/accumulators/statistics_fwd.hpp> |
| 32 | #include <boost/accumulators/statistics/parameters/quantile_probability.hpp> |
| 33 | #include <boost/accumulators/statistics/count.hpp> |
| 34 | #include <boost/accumulators/statistics/tail.hpp> |
| 35 | |
| 36 | #ifdef _MSC_VER |
| 37 | # pragma warning(push) |
| 38 | # pragma warning(disable: 4127) // conditional expression is constant |
| 39 | #endif |
| 40 | |
| 41 | namespace boost { namespace accumulators |
| 42 | { |
| 43 | |
| 44 | /////////////////////////////////////////////////////////////////////////////// |
| 45 | // threshold_probability and threshold named parameters |
| 46 | // |
| 47 | BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value) |
| 48 | BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability) |
| 49 | |
| 50 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value) |
| 51 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability) |
| 52 | |
| 53 | namespace impl |
| 54 | { |
| 55 | /////////////////////////////////////////////////////////////////////////////// |
| 56 | // peaks_over_threshold_impl |
| 57 | // works with an explicit threshold value and does not depend on order statistics |
| 58 | /** |
| 59 | @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation |
| 60 | |
| 61 | According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of |
| 62 | the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$ |
| 63 | may be approximated by a generalized Pareto distribution |
| 64 | \f[ |
| 65 | G_{\xi,\beta}(x) = |
| 66 | \left\{ |
| 67 | \begin{array}{ll} |
| 68 | \beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\ |
| 69 | \beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0, |
| 70 | \end{array} |
| 71 | \right. |
| 72 | \f] |
| 73 | with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf. |
| 74 | Hosking and Wallis (1987), |
| 75 | \f[ |
| 76 | \begin{array}{lll} |
| 77 | \hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\ |
| 78 | \hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right], |
| 79 | \end{array} |
| 80 | \f] |
| 81 | \f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over |
| 82 | the threshold \f$u\f$. Equivalently, the distribution function |
| 83 | \f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by |
| 84 | \f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$ |
| 85 | can be written as |
| 86 | \f[ |
| 87 | F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u) |
| 88 | \f] |
| 89 | and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function |
| 90 | \f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by |
| 91 | \f[ |
| 92 | \widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u). |
| 93 | \f] |
| 94 | It can be shown that \f$\widehat{F}(x)\f$ is a generalized |
| 95 | Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$ |
| 96 | and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$, |
| 97 | one obtains an estimator for the \f$\alpha\f$-quantile, |
| 98 | \f[ |
| 99 | \hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right], |
| 100 | \f] |
| 101 | and similarly an estimator for the (coherent) tail mean, |
| 102 | \f[ |
| 103 | \widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi}, |
| 104 | \f] |
| 105 | cf. McNeil and Frey (2000). |
| 106 | |
| 107 | Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the |
| 108 | \f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define |
| 109 | the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are |
| 110 | computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the |
| 111 | correct result. |
| 112 | |
| 113 | For further details, see |
| 114 | |
| 115 | J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution, |
| 116 | Technometrics, Volume 29, 1987, p. 339-349 |
| 117 | |
| 118 | A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series: |
| 119 | an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300 |
| 120 | |
| 121 | @param quantile_probability |
| 122 | @param pot_threshold_value |
| 123 | */ |
| 124 | template<typename Sample, typename LeftRight> |
| 125 | struct peaks_over_threshold_impl |
| 126 | : accumulator_base |
| 127 | { |
| 128 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; |
| 129 | // for boost::result_of |
| 130 | typedef boost::tuple<float_type, float_type, float_type> result_type; |
| 131 | // for left tail fitting, mirror the extreme values |
| 132 | typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign; |
| 133 | |
| 134 | template<typename Args> |
| 135 | peaks_over_threshold_impl(Args const &args) |
| 136 | : Nu_(0) |
| 137 | , mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
| 138 | , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
| 139 | , threshold_(sign::value * args[pot_threshold_value]) |
| 140 | , fit_parameters_(boost::make_tuple(t0: 0., t1: 0., t2: 0.)) |
| 141 | , is_dirty_(true) |
| 142 | { |
| 143 | } |
| 144 | |
| 145 | template<typename Args> |
| 146 | void operator ()(Args const &args) |
| 147 | { |
| 148 | this->is_dirty_ = true; |
| 149 | |
| 150 | if (sign::value * args[sample] > this->threshold_) |
| 151 | { |
| 152 | this->mu_ += args[sample]; |
| 153 | this->sigma2_ += args[sample] * args[sample]; |
| 154 | ++this->Nu_; |
| 155 | } |
| 156 | } |
| 157 | |
| 158 | template<typename Args> |
| 159 | result_type result(Args const &args) const |
| 160 | { |
| 161 | if (this->is_dirty_) |
| 162 | { |
| 163 | this->is_dirty_ = false; |
| 164 | |
| 165 | std::size_t cnt = count(args); |
| 166 | |
| 167 | this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_); |
| 168 | this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_); |
| 169 | this->sigma2_ -= this->mu_ * this->mu_; |
| 170 | |
| 171 | float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt); |
| 172 | |
| 173 | float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )*( this->mu_ - this->threshold_ ), this->sigma2_); |
| 174 | float_type xi_hat = 0.5 * ( 1. - tmp ); |
| 175 | float_type beta_hat = 0.5 * ( this->mu_ - this->threshold_ ) * ( 1. + tmp ); |
| 176 | float_type beta_bar = beta_hat * std::pow(1. - threshold_probability, xi_hat); |
| 177 | float_type u_bar = this->threshold_ - beta_bar * ( std::pow(1. - threshold_probability, -xi_hat) - 1.)/xi_hat; |
| 178 | this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); |
| 179 | } |
| 180 | |
| 181 | return this->fit_parameters_; |
| 182 | } |
| 183 | |
| 184 | // make this accumulator serializeable |
| 185 | // TODO: do we need to split to load/save and verify that threshold did not change? |
| 186 | template<class Archive> |
| 187 | void serialize(Archive & ar, const unsigned int file_version) |
| 188 | { |
| 189 | ar & Nu_; |
| 190 | ar & mu_; |
| 191 | ar & sigma2_; |
| 192 | ar & threshold_; |
| 193 | ar & get<0>(fit_parameters_); |
| 194 | ar & get<1>(fit_parameters_); |
| 195 | ar & get<2>(fit_parameters_); |
| 196 | ar & is_dirty_; |
| 197 | } |
| 198 | |
| 199 | private: |
| 200 | std::size_t Nu_; // number of samples larger than threshold |
| 201 | mutable float_type mu_; // mean of Nu_ largest samples |
| 202 | mutable float_type sigma2_; // variance of Nu_ largest samples |
| 203 | float_type threshold_; |
| 204 | mutable result_type fit_parameters_; // boost::tuple that stores fit parameters |
| 205 | mutable bool is_dirty_; |
| 206 | }; |
| 207 | |
| 208 | /////////////////////////////////////////////////////////////////////////////// |
| 209 | // peaks_over_threshold_prob_impl |
| 210 | // determines threshold from a given threshold probability using order statistics |
| 211 | /** |
| 212 | @brief Peaks over Threshold Method for Quantile and Tail Mean Estimation |
| 213 | |
| 214 | @sa peaks_over_threshold_impl |
| 215 | |
| 216 | @param quantile_probability |
| 217 | @param pot_threshold_probability |
| 218 | */ |
| 219 | template<typename Sample, typename LeftRight> |
| 220 | struct peaks_over_threshold_prob_impl |
| 221 | : accumulator_base |
| 222 | { |
| 223 | typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type; |
| 224 | // for boost::result_of |
| 225 | typedef boost::tuple<float_type, float_type, float_type> result_type; |
| 226 | // for left tail fitting, mirror the extreme values |
| 227 | typedef mpl::int_<is_same<LeftRight, left>::value ? -1 : 1> sign; |
| 228 | |
| 229 | template<typename Args> |
| 230 | peaks_over_threshold_prob_impl(Args const &args) |
| 231 | : mu_(sign::value * numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
| 232 | , sigma2_(numeric::fdiv(args[sample | Sample()], (std::size_t)1)) |
| 233 | , threshold_probability_(args[pot_threshold_probability]) |
| 234 | , fit_parameters_(boost::make_tuple(t0: 0., t1: 0., t2: 0.)) |
| 235 | , is_dirty_(true) |
| 236 | { |
| 237 | } |
| 238 | |
| 239 | void operator ()(dont_care) |
| 240 | { |
| 241 | this->is_dirty_ = true; |
| 242 | } |
| 243 | |
| 244 | template<typename Args> |
| 245 | result_type result(Args const &args) const |
| 246 | { |
| 247 | if (this->is_dirty_) |
| 248 | { |
| 249 | this->is_dirty_ = false; |
| 250 | |
| 251 | std::size_t cnt = count(args); |
| 252 | |
| 253 | // the n'th cached sample provides an approximate threshold value u |
| 254 | std::size_t n = static_cast<std::size_t>( |
| 255 | std::ceil( |
| 256 | cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : 1. - this->threshold_probability_ ) |
| 257 | ) |
| 258 | ); |
| 259 | |
| 260 | // If n is in a valid range, return result, otherwise return NaN or throw exception |
| 261 | if ( n >= static_cast<std::size_t>(tail(args).size())) |
| 262 | { |
| 263 | if (std::numeric_limits<float_type>::has_quiet_NaN) |
| 264 | { |
| 265 | return boost::make_tuple( |
| 266 | std::numeric_limits<float_type>::quiet_NaN() |
| 267 | , std::numeric_limits<float_type>::quiet_NaN() |
| 268 | , std::numeric_limits<float_type>::quiet_NaN() |
| 269 | ); |
| 270 | } |
| 271 | else |
| 272 | { |
| 273 | std::ostringstream msg; |
| 274 | msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")" ; |
| 275 | boost::throw_exception(e: std::runtime_error(msg.str())); |
| 276 | return boost::make_tuple(Sample(0), Sample(0), Sample(0)); |
| 277 | } |
| 278 | } |
| 279 | else |
| 280 | { |
| 281 | float_type u = *(tail(args).begin() + n - 1) * sign::value; |
| 282 | |
| 283 | // compute mean and variance of samples above/under threshold value u |
| 284 | for (std::size_t i = 0; i < n; ++i) |
| 285 | { |
| 286 | mu_ += *(tail(args).begin() + i); |
| 287 | sigma2_ += *(tail(args).begin() + i) * (*(tail(args).begin() + i)); |
| 288 | } |
| 289 | |
| 290 | this->mu_ = sign::value * numeric::fdiv(this->mu_, n); |
| 291 | this->sigma2_ = numeric::fdiv(this->sigma2_, n); |
| 292 | this->sigma2_ -= this->mu_ * this->mu_; |
| 293 | |
| 294 | if (is_same<LeftRight, left>::value) |
| 295 | this->threshold_probability_ = 1. - this->threshold_probability_; |
| 296 | |
| 297 | float_type tmp = numeric::fdiv(( this->mu_ - u )*( this->mu_ - u ), this->sigma2_); |
| 298 | float_type xi_hat = 0.5 * ( 1. - tmp ); |
| 299 | float_type beta_hat = 0.5 * ( this->mu_ - u ) * ( 1. + tmp ); |
| 300 | float_type beta_bar = beta_hat * std::pow(1. - threshold_probability_, xi_hat); |
| 301 | float_type u_bar = u - beta_bar * ( std::pow(1. - threshold_probability_, -xi_hat) - 1.)/xi_hat; |
| 302 | this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat); |
| 303 | } |
| 304 | } |
| 305 | |
| 306 | return this->fit_parameters_; |
| 307 | } |
| 308 | |
| 309 | // make this accumulator serializeable |
| 310 | // TODO: do we need to split to load/save and verify that threshold did not change? |
| 311 | template<class Archive> |
| 312 | void serialize(Archive & ar, const unsigned int file_version) |
| 313 | { |
| 314 | ar & mu_; |
| 315 | ar & sigma2_; |
| 316 | ar & threshold_probability_; |
| 317 | ar & get<0>(fit_parameters_); |
| 318 | ar & get<1>(fit_parameters_); |
| 319 | ar & get<2>(fit_parameters_); |
| 320 | ar & is_dirty_; |
| 321 | } |
| 322 | |
| 323 | private: |
| 324 | mutable float_type mu_; // mean of samples above threshold u |
| 325 | mutable float_type sigma2_; // variance of samples above threshold u |
| 326 | mutable float_type threshold_probability_; |
| 327 | mutable result_type fit_parameters_; // boost::tuple that stores fit parameters |
| 328 | mutable bool is_dirty_; |
| 329 | }; |
| 330 | |
| 331 | } // namespace impl |
| 332 | |
| 333 | /////////////////////////////////////////////////////////////////////////////// |
| 334 | // tag::peaks_over_threshold |
| 335 | // |
| 336 | namespace tag |
| 337 | { |
| 338 | template<typename LeftRight> |
| 339 | struct peaks_over_threshold |
| 340 | : depends_on<count> |
| 341 | , pot_threshold_value |
| 342 | { |
| 343 | /// INTERNAL ONLY |
| 344 | /// |
| 345 | typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl; |
| 346 | }; |
| 347 | |
| 348 | template<typename LeftRight> |
| 349 | struct peaks_over_threshold_prob |
| 350 | : depends_on<count, tail<LeftRight> > |
| 351 | , pot_threshold_probability |
| 352 | { |
| 353 | /// INTERNAL ONLY |
| 354 | /// |
| 355 | typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl; |
| 356 | }; |
| 357 | |
| 358 | struct abstract_peaks_over_threshold |
| 359 | : depends_on<> |
| 360 | { |
| 361 | }; |
| 362 | } |
| 363 | |
| 364 | /////////////////////////////////////////////////////////////////////////////// |
| 365 | // extract::peaks_over_threshold |
| 366 | // |
| 367 | namespace extract |
| 368 | { |
| 369 | extractor<tag::abstract_peaks_over_threshold> const = {}; |
| 370 | |
| 371 | BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold) |
| 372 | } |
| 373 | |
| 374 | using extract::peaks_over_threshold; |
| 375 | |
| 376 | // peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight> |
| 377 | template<typename LeftRight> |
| 378 | struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)> |
| 379 | { |
| 380 | typedef tag::peaks_over_threshold<LeftRight> type; |
| 381 | }; |
| 382 | |
| 383 | // peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight> |
| 384 | template<typename LeftRight> |
| 385 | struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)> |
| 386 | { |
| 387 | typedef tag::peaks_over_threshold_prob<LeftRight> type; |
| 388 | }; |
| 389 | |
| 390 | template<typename LeftRight> |
| 391 | struct feature_of<tag::peaks_over_threshold<LeftRight> > |
| 392 | : feature_of<tag::abstract_peaks_over_threshold> |
| 393 | { |
| 394 | }; |
| 395 | |
| 396 | template<typename LeftRight> |
| 397 | struct feature_of<tag::peaks_over_threshold_prob<LeftRight> > |
| 398 | : feature_of<tag::abstract_peaks_over_threshold> |
| 399 | { |
| 400 | }; |
| 401 | |
| 402 | // So that peaks_over_threshold can be automatically substituted |
| 403 | // with weighted_peaks_over_threshold when the weight parameter is non-void. |
| 404 | template<typename LeftRight> |
| 405 | struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> > |
| 406 | { |
| 407 | typedef tag::weighted_peaks_over_threshold<LeftRight> type; |
| 408 | }; |
| 409 | |
| 410 | template<typename LeftRight> |
| 411 | struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> > |
| 412 | : feature_of<tag::peaks_over_threshold<LeftRight> > |
| 413 | {}; |
| 414 | |
| 415 | // So that peaks_over_threshold_prob can be automatically substituted |
| 416 | // with weighted_peaks_over_threshold_prob when the weight parameter is non-void. |
| 417 | template<typename LeftRight> |
| 418 | struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> > |
| 419 | { |
| 420 | typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type; |
| 421 | }; |
| 422 | |
| 423 | template<typename LeftRight> |
| 424 | struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> > |
| 425 | : feature_of<tag::peaks_over_threshold_prob<LeftRight> > |
| 426 | {}; |
| 427 | |
| 428 | }} // namespace boost::accumulators |
| 429 | |
| 430 | #ifdef _MSC_VER |
| 431 | # pragma warning(pop) |
| 432 | #endif |
| 433 | |
| 434 | #endif |
| 435 | |