peaks_over_threshold.hpp source code [boost/libs/accumulators/include/boost/accumulators/statistics/peaks_over_threshold.hpp]

1	///////////////////////////////////////////////////////////////////////////////
2	// peaks_over_threshold.hpp
3	//
4	// Copyright 2006 Daniel Egloff, Olivier Gygi. Distributed under the Boost
5	// Software License, Version 1.0. (See accompanying file
6	// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
7
8	#ifndef BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
9	#define BOOST_ACCUMULATORS_STATISTICS_PEAKS_OVER_THRESHOLD_HPP_DE_01_01_2006
10
11	#include <vector>
12	#include <limits>
13	#include <numeric>
14	#include <functional>
15	#include <boost/config/no_tr1/cmath.hpp> // pow
16	#include <sstream> // stringstream
17	#include <stdexcept> // runtime_error
18	#include <boost/throw_exception.hpp>
19	#include <boost/range.hpp>
20	#include <boost/mpl/if.hpp>
21	#include <boost/mpl/int.hpp>
22	#include <boost/mpl/placeholders.hpp>
23	#include <boost/parameter/keyword.hpp>
24	#include <boost/tuple/tuple.hpp>
25	#include <boost/accumulators/accumulators_fwd.hpp>
26	#include <boost/accumulators/framework/accumulator_base.hpp>
27	#include <boost/accumulators/framework/extractor.hpp>
28	#include <boost/accumulators/numeric/functional.hpp>
29	#include <boost/accumulators/framework/parameters/sample.hpp>
30	#include <boost/accumulators/framework/depends_on.hpp>
31	#include <boost/accumulators/statistics_fwd.hpp>
32	#include <boost/accumulators/statistics/parameters/quantile_probability.hpp>
33	#include <boost/accumulators/statistics/count.hpp>
34	#include <boost/accumulators/statistics/tail.hpp>
35
36	#ifdef _MSC_VER
37	# pragma warning(push)
38	# pragma warning(disable: 4127) // conditional expression is constant
39	#endif
40
41	namespace boost { namespace accumulators
42	{
43
44	///////////////////////////////////////////////////////////////////////////////
45	// threshold_probability and threshold named parameters
46	//
47	BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_value, threshold_value)
48	BOOST_PARAMETER_NESTED_KEYWORD(tag, pot_threshold_probability, threshold_probability)
49
50	BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_value)
51	BOOST_ACCUMULATORS_IGNORE_GLOBAL(pot_threshold_probability)
52
53	namespace impl
54	{
55	///////////////////////////////////////////////////////////////////////////////
56	// peaks_over_threshold_impl
57	// works with an explicit threshold value and does not depend on order statistics
58	/**
59	@brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
60
61	According to the theorem of Pickands-Balkema-de Haan, the distribution function \f$F_u(x)\f$ of
62	the excesses \f$x\f$ over some sufficiently high threshold \f$u\f$ of a distribution function \f$F(x)\f$
63	may be approximated by a generalized Pareto distribution
64	\f[
65	G_{\xi,\beta}(x) =
66	\left\{
67	\begin{array}{ll}
68	\beta^{-1}\left(1+\frac{\xi x}{\beta}\right)^{-1/\xi-1} & \textrm{if }\xi\neq0\\
69	\beta^{-1}\exp\left(-\frac{x}{\beta}\right) & \textrm{if }\xi=0,
70	\end{array}
71	\right.
72	\f]
73	with suitable parameters \f$\xi\f$ and \f$\beta\f$ that can be estimated, e.g., with the method of moments, cf.
74	Hosking and Wallis (1987),
75	\f[
76	\begin{array}{lll}
77	\hat{\xi} & = & \frac{1}{2}\left[1-\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}\right]\\
78	\hat{\beta} & = & \frac{\hat{\mu}-u}{2}\left[\frac{(\hat{\mu}-u)^2}{\hat{\sigma}^2}+1\right],
79	\end{array}
80	\f]
81	\f$\hat{\mu}\f$ and \f$\hat{\sigma}^2\f$ being the empirical mean and variance of the samples over
82	the threshold \f$u\f$. Equivalently, the distribution function
83	\f$F_u(x-u)\f$ of the exceedances \f$x-u\f$ can be approximated by
84	\f$G_{\xi,\beta}(x-u)=G_{\xi,\beta,u}(x)\f$. Since for \f$x\geq u\f$ the distribution function \f$F(x)\f$
85	can be written as
86	\f[
87	F(x) = [1 - \P(X \leq u)]F_u(x - u) + \P(X \leq u)
88	\f]
89	and the probability \f$\P(X \leq u)\f$ can be approximated by the empirical distribution function
90	\f$F_n(u)\f$ evaluated at \f$u\f$, an estimator of \f$F(x)\f$ is given by
91	\f[
92	\widehat{F}(x) = [1 - F_n(u)]G_{\xi,\beta,u}(x) + F_n(u).
93	\f]
94	It can be shown that \f$\widehat{F}(x)\f$ is a generalized
95	Pareto distribution \f$G_{\xi,\bar{\beta},\bar{u}}(x)\f$ with \f$\bar{\beta}=\beta[1-F_n(u)]^{\xi}\f$
96	and \f$\bar{u}=u-\bar{\beta}\left\{[1-F_n(u)]^{-\xi}-1\right\}/\xi\f$. By inverting \f$\widehat{F}(x)\f$,
97	one obtains an estimator for the \f$\alpha\f$-quantile,
98	\f[
99	\hat{q}_{\alpha} = \bar{u} + \frac{\bar{\beta}}{\xi}\left[(1-\alpha)^{-\xi}-1\right],
100	\f]
101	and similarly an estimator for the (coherent) tail mean,
102	\f[
103	\widehat{CTM}_{\alpha} = \hat{q}_{\alpha} - \frac{\bar{\beta}}{\xi-1}(1-\alpha)^{-\xi},
104	\f]
105	cf. McNeil and Frey (2000).
106
107	Note that in case extreme values of the left tail are fitted, the distribution is mirrored with respect to the
108	\f$y\f$ axis such that the left tail can be treated as a right tail. The computed fit parameters thus define
109	the Pareto distribution that fits the mirrored left tail. When quantities like a quantile or a tail mean are
110	computed using the fit parameters obtained from the mirrored data, the result is mirrored back, yielding the
111	correct result.
112
113	For further details, see
114
115	J. R. M. Hosking and J. R. Wallis, Parameter and quantile estimation for the generalized Pareto distribution,
116	Technometrics, Volume 29, 1987, p. 339-349
117
118	A. J. McNeil and R. Frey, Estimation of Tail-Related Risk Measures for Heteroscedastic Financial Time Series:
119	an Extreme Value Approach, Journal of Empirical Finance, Volume 7, 2000, p. 271-300
120
121	@param quantile_probability
122	@param pot_threshold_value
123	*/
124	template<typename Sample, typename LeftRight>
125	struct peaks_over_threshold_impl
126	: accumulator_base
127	{
128	typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
129	// for boost::result_of
130	typedef boost::tuple<float_type, float_type, float_type> result_type;
131	// for left tail fitting, mirror the extreme values
132	typedef mpl::int_<is_same<LeftRight, left>::value ? -`1` : `1`> sign;
133
134	template<typename Args>
135	peaks_over_threshold_impl(Args const &args)
136	: Nu_(`0`)
137	, mu_(sign::value * numeric::fdiv(args[sample \| Sample()], (std::size_t)`1`))
138	, sigma2_(numeric::fdiv(args[sample \| Sample()], (std::size_t)`1`))
139	, threshold_(sign::value * args[pot_threshold_value])
140	, fit_parameters_(boost::make_tuple(t0: `0.`, t1: `0.`, t2: `0.`))
141	, is_dirty_(true)
142	{
143	}
144
145	template<typename Args>
146	void operator ()(Args const &args)
147	{
148	this->is_dirty_ = true;
149
150	if (sign::value * args[sample] > this->threshold_)
151	{
152	this->mu_ += args[sample];
153	this->sigma2_ += args[sample] * args[sample];
154	++this->Nu_;
155	}
156	}
157
158	template<typename Args>
159	result_type result(Args const &args) const
160	{
161	if (this->is_dirty_)
162	{
163	this->is_dirty_ = false;
164
165	std::size_t cnt = count(args);
166
167	this->mu_ = sign::value * numeric::fdiv(this->mu_, this->Nu_);
168	this->sigma2_ = numeric::fdiv(this->sigma2_, this->Nu_);
169	this->sigma2_ -= this->mu_ * this->mu_;
170
171	float_type threshold_probability = numeric::fdiv(cnt - this->Nu_, cnt);
172
173	float_type tmp = numeric::fdiv(( this->mu_ - this->threshold_ )( this->mu_ - this->threshold_ ), this*->sigma2_);
174	float_type xi_hat = `0.5` * ( `1.` - tmp );
175	float_type beta_hat = `0.5` * ( this->mu_ - this->threshold_ ) * ( `1.` + tmp );
176	float_type beta_bar = beta_hat * std::pow(`1.` - threshold_probability, xi_hat);
177	float_type u_bar = this->threshold_ - beta_bar * ( std::pow(`1.` - threshold_probability, -xi_hat) - `1.`)/xi_hat;
178	this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
179	}
180
181	return this->fit_parameters_;
182	}
183
184	// make this accumulator serializeable
185	// TODO: do we need to split to load/save and verify that threshold did not change?
186	template<class Archive>
187	void serialize(Archive & ar, const unsigned int file_version)
188	{
189	ar & Nu_;
190	ar & mu_;
191	ar & sigma2_;
192	ar & threshold_;
193	ar & get<`0`>(fit_parameters_);
194	ar & get<`1`>(fit_parameters_);
195	ar & get<`2`>(fit_parameters_);
196	ar & is_dirty_;
197	}
198
199	private:
200	std::size_t Nu_; // number of samples larger than threshold
201	mutable float_type mu_; // mean of Nu_ largest samples
202	mutable float_type sigma2_; // variance of Nu_ largest samples
203	float_type threshold_;
204	mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
205	mutable bool is_dirty_;
206	};
207
208	///////////////////////////////////////////////////////////////////////////////
209	// peaks_over_threshold_prob_impl
210	// determines threshold from a given threshold probability using order statistics
211	/**
212	@brief Peaks over Threshold Method for Quantile and Tail Mean Estimation
213
214	@sa peaks_over_threshold_impl
215
216	@param quantile_probability
217	@param pot_threshold_probability
218	*/
219	template<typename Sample, typename LeftRight>
220	struct peaks_over_threshold_prob_impl
221	: accumulator_base
222	{
223	typedef typename numeric::functional::fdiv<Sample, std::size_t>::result_type float_type;
224	// for boost::result_of
225	typedef boost::tuple<float_type, float_type, float_type> result_type;
226	// for left tail fitting, mirror the extreme values
227	typedef mpl::int_<is_same<LeftRight, left>::value ? -`1` : `1`> sign;
228
229	template<typename Args>
230	peaks_over_threshold_prob_impl(Args const &args)
231	: mu_(sign::value * numeric::fdiv(args[sample \| Sample()], (std::size_t)`1`))
232	, sigma2_(numeric::fdiv(args[sample \| Sample()], (std::size_t)`1`))
233	, threshold_probability_(args[pot_threshold_probability])
234	, fit_parameters_(boost::make_tuple(t0: `0.`, t1: `0.`, t2: `0.`))
235	, is_dirty_(true)
236	{
237	}
238
239	void operator ()(dont_care)
240	{
241	this->is_dirty_ = true;
242	}
243
244	template<typename Args>
245	result_type result(Args const &args) const
246	{
247	if (this->is_dirty_)
248	{
249	this->is_dirty_ = false;
250
251	std::size_t cnt = count(args);
252
253	// the n'th cached sample provides an approximate threshold value u
254	std::size_t n = static_cast<std::size_t>(
255	std::ceil(
256	cnt * ( ( is_same<LeftRight, left>::value ) ? this->threshold_probability_ : `1.` - this->threshold_probability_ )
257	)
258	);
259
260	// If n is in a valid range, return result, otherwise return NaN or throw exception
261	if ( n >= static_cast<std::size_t>(tail(args).size()))
262	{
263	if (std::numeric_limits<float_type>::has_quiet_NaN)
264	{
265	return boost::make_tuple(
266	std::numeric_limits<float_type>::quiet_NaN()
267	, std::numeric_limits<float_type>::quiet_NaN()
268	, std::numeric_limits<float_type>::quiet_NaN()
269	);
270	}
271	else
272	{
273	std::ostringstream msg;
274	msg << "index n = " << n << " is not in valid range [0, " << tail(args).size() << ")";
275	boost::throw_exception(e: std::runtime_error (msg.str()));
276	return boost::make_tuple(Sample(`0`), Sample(`0`), Sample(`0`));
277	}
278	}
279	else
280	{
281	float_type u = (tail(args).begin() + n - `1`) sign::value;
282
283	// compute mean and variance of samples above/under threshold value u
284	for (std::size_t i = `0`; i < n; ++i)
285	{
286	mu_ += *(tail(args).begin() + i);
287	sigma2_ += (tail(args).begin() + i) (*(tail(args).begin() + i));
288	}
289
290	this->mu_ = sign::value * numeric::fdiv(this->mu_, n);
291	this->sigma2_ = numeric::fdiv(this->sigma2_, n);
292	this->sigma2_ -= this->mu_ * this->mu_;
293
294	if (is_same<LeftRight, left>::value)
295	this->threshold_probability_ = `1.` - this->threshold_probability_;
296
297	float_type tmp = numeric::fdiv(( this->mu_ - u )( this->mu_ - u ), this*->sigma2_);
298	float_type xi_hat = `0.5` * ( `1.` - tmp );
299	float_type beta_hat = `0.5` * ( this->mu_ - u ) * ( `1.` + tmp );
300	float_type beta_bar = beta_hat * std::pow(`1.` - threshold_probability_, xi_hat);
301	float_type u_bar = u - beta_bar * ( std::pow(`1.` - threshold_probability_, -xi_hat) - `1.`)/xi_hat;
302	this->fit_parameters_ = boost::make_tuple(u_bar, beta_bar, xi_hat);
303	}
304	}
305
306	return this->fit_parameters_;
307	}
308
309	// make this accumulator serializeable
310	// TODO: do we need to split to load/save and verify that threshold did not change?
311	template<class Archive>
312	void serialize(Archive & ar, const unsigned int file_version)
313	{
314	ar & mu_;
315	ar & sigma2_;
316	ar & threshold_probability_;
317	ar & get<`0`>(fit_parameters_);
318	ar & get<`1`>(fit_parameters_);
319	ar & get<`2`>(fit_parameters_);
320	ar & is_dirty_;
321	}
322
323	private:
324	mutable float_type mu_; // mean of samples above threshold u
325	mutable float_type sigma2_; // variance of samples above threshold u
326	mutable float_type threshold_probability_;
327	mutable result_type fit_parameters_; // boost::tuple that stores fit parameters
328	mutable bool is_dirty_;
329	};
330
331	} // namespace impl
332
333	///////////////////////////////////////////////////////////////////////////////
334	// tag::peaks_over_threshold
335	//
336	namespace tag
337	{
338	template<typename LeftRight>
339	struct peaks_over_threshold
340	: depends_on<count>
341	, pot_threshold_value
342	{
343	/// INTERNAL ONLY
344	///
345	typedef accumulators::impl::peaks_over_threshold_impl<mpl::_1, LeftRight> impl;
346	};
347
348	template<typename LeftRight>
349	struct peaks_over_threshold_prob
350	: depends_on<count, tail<LeftRight> >
351	, pot_threshold_probability
352	{
353	/// INTERNAL ONLY
354	///
355	typedef accumulators::impl::peaks_over_threshold_prob_impl<mpl::_1, LeftRight> impl;
356	};
357
358	struct abstract_peaks_over_threshold
359	: depends_on<>
360	{
361	};
362	}
363
364	///////////////////////////////////////////////////////////////////////////////
365	// extract::peaks_over_threshold
366	//
367	namespace extract
368	{
369	extractor<tag::abstract_peaks_over_threshold> const peaks_over_threshold = {};
370
371	BOOST_ACCUMULATORS_IGNORE_GLOBAL(peaks_over_threshold)
372	}
373
374	using extract::peaks_over_threshold;
375
376	// peaks_over_threshold<LeftRight>(with_threshold_value) -> peaks_over_threshold<LeftRight>
377	template<typename LeftRight>
378	struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_value)>
379	{
380	typedef tag::peaks_over_threshold<LeftRight> type;
381	};
382
383	// peaks_over_threshold<LeftRight>(with_threshold_probability) -> peaks_over_threshold_prob<LeftRight>
384	template<typename LeftRight>
385	struct as_feature<tag::peaks_over_threshold<LeftRight>(with_threshold_probability)>
386	{
387	typedef tag::peaks_over_threshold_prob<LeftRight> type;
388	};
389
390	template<typename LeftRight>
391	struct feature_of<tag::peaks_over_threshold<LeftRight> >
392	: feature_of<tag::abstract_peaks_over_threshold>
393	{
394	};
395
396	template<typename LeftRight>
397	struct feature_of<tag::peaks_over_threshold_prob<LeftRight> >
398	: feature_of<tag::abstract_peaks_over_threshold>
399	{
400	};
401
402	// So that peaks_over_threshold can be automatically substituted
403	// with weighted_peaks_over_threshold when the weight parameter is non-void.
404	template<typename LeftRight>
405	struct as_weighted_feature<tag::peaks_over_threshold<LeftRight> >
406	{
407	typedef tag::weighted_peaks_over_threshold<LeftRight> type;
408	};
409
410	template<typename LeftRight>
411	struct feature_of<tag::weighted_peaks_over_threshold<LeftRight> >
412	: feature_of<tag::peaks_over_threshold<LeftRight> >
413	{};
414
415	// So that peaks_over_threshold_prob can be automatically substituted
416	// with weighted_peaks_over_threshold_prob when the weight parameter is non-void.
417	template<typename LeftRight>
418	struct as_weighted_feature<tag::peaks_over_threshold_prob<LeftRight> >
419	{
420	typedef tag::weighted_peaks_over_threshold_prob<LeftRight> type;
421	};
422
423	template<typename LeftRight>
424	struct feature_of<tag::weighted_peaks_over_threshold_prob<LeftRight> >
425	: feature_of<tag::peaks_over_threshold_prob<LeftRight> >
426	{};
427
428	}} // namespace boost::accumulators
429
430	#ifdef _MSC_VER
431	# pragma warning(pop)
432	#endif
433
434	#endif
435

source code of boost/libs/accumulators/include/boost/accumulators/statistics/peaks_over_threshold.hpp