1 | // Copyright John Maddock 2006. |
2 | // Copyright Paul A. Bristow 2006, 2012, 2017. |
3 | // Copyright Thomas Mang 2012. |
4 | |
5 | // Use, modification and distribution are subject to the |
6 | // Boost Software License, Version 1.0. (See accompanying file |
7 | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
8 | |
9 | #ifndef BOOST_STATS_STUDENTS_T_HPP |
10 | #define BOOST_STATS_STUDENTS_T_HPP |
11 | |
12 | // http://en.wikipedia.org/wiki/Student%27s_t_distribution |
13 | // http://www.itl.nist.gov/div898/handbook/eda/section3/eda3664.htm |
14 | |
15 | #include <boost/math/distributions/fwd.hpp> |
16 | #include <boost/math/special_functions/beta.hpp> // for ibeta(a, b, x). |
17 | #include <boost/math/special_functions/digamma.hpp> |
18 | #include <boost/math/distributions/complement.hpp> |
19 | #include <boost/math/distributions/detail/common_error_handling.hpp> |
20 | #include <boost/math/distributions/normal.hpp> |
21 | |
22 | #include <utility> |
23 | |
24 | #ifdef BOOST_MSVC |
25 | # pragma warning(push) |
26 | # pragma warning(disable: 4702) // unreachable code (return after domain_error throw). |
27 | #endif |
28 | |
29 | namespace boost { namespace math { |
30 | |
31 | template <class RealType = double, class Policy = policies::policy<> > |
32 | class students_t_distribution |
33 | { |
34 | public: |
35 | typedef RealType value_type; |
36 | typedef Policy policy_type; |
37 | |
38 | students_t_distribution(RealType df) : df_(df) |
39 | { // Constructor. |
40 | RealType result; |
41 | detail::check_df_gt0_to_inf( // Checks that df > 0 or df == inf. |
42 | "boost::math::students_t_distribution<%1%>::students_t_distribution" , df_, &result, Policy()); |
43 | } // students_t_distribution |
44 | |
45 | RealType degrees_of_freedom()const |
46 | { |
47 | return df_; |
48 | } |
49 | |
50 | // Parameter estimation: |
51 | static RealType find_degrees_of_freedom( |
52 | RealType difference_from_mean, |
53 | RealType alpha, |
54 | RealType beta, |
55 | RealType sd, |
56 | RealType hint = 100); |
57 | |
58 | private: |
59 | // Data member: |
60 | RealType df_; // degrees of freedom is a real number > 0 or +infinity. |
61 | }; |
62 | |
63 | typedef students_t_distribution<double> students_t; // Convenience typedef for double version. |
64 | |
65 | template <class RealType, class Policy> |
66 | inline const std::pair<RealType, RealType> range(const students_t_distribution<RealType, Policy>& /*dist*/) |
67 | { // Range of permissible values for random variable x. |
68 | // Now including infinity. |
69 | using boost::math::tools::max_value; |
70 | //return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); |
71 | return std::pair<RealType, RealType>(((::std::numeric_limits<RealType>::is_specialized & ::std::numeric_limits<RealType>::has_infinity) ? -std::numeric_limits<RealType>::infinity() : -max_value<RealType>()), ((::std::numeric_limits<RealType>::is_specialized & ::std::numeric_limits<RealType>::has_infinity) ? +std::numeric_limits<RealType>::infinity() : +max_value<RealType>())); |
72 | } |
73 | |
74 | template <class RealType, class Policy> |
75 | inline const std::pair<RealType, RealType> support(const students_t_distribution<RealType, Policy>& /*dist*/) |
76 | { // Range of supported values for random variable x. |
77 | // Now including infinity. |
78 | // This is range where cdf rises from 0 to 1, and outside it, the pdf is zero. |
79 | using boost::math::tools::max_value; |
80 | //return std::pair<RealType, RealType>(-max_value<RealType>(), max_value<RealType>()); |
81 | return std::pair<RealType, RealType>(((::std::numeric_limits<RealType>::is_specialized & ::std::numeric_limits<RealType>::has_infinity) ? -std::numeric_limits<RealType>::infinity() : -max_value<RealType>()), ((::std::numeric_limits<RealType>::is_specialized & ::std::numeric_limits<RealType>::has_infinity) ? +std::numeric_limits<RealType>::infinity() : +max_value<RealType>())); |
82 | } |
83 | |
84 | template <class RealType, class Policy> |
85 | inline RealType pdf(const students_t_distribution<RealType, Policy>& dist, const RealType& x) |
86 | { |
87 | BOOST_FPU_EXCEPTION_GUARD |
88 | BOOST_MATH_STD_USING // for ADL of std functions. |
89 | |
90 | RealType error_result; |
91 | if(false == detail::check_x_not_NaN( |
92 | "boost::math::pdf(const students_t_distribution<%1%>&, %1%)" , x, &error_result, Policy())) |
93 | return error_result; |
94 | RealType df = dist.degrees_of_freedom(); |
95 | if(false == detail::check_df_gt0_to_inf( // Check that df > 0 or == +infinity. |
96 | "boost::math::pdf(const students_t_distribution<%1%>&, %1%)" , df, &error_result, Policy())) |
97 | return error_result; |
98 | |
99 | RealType result; |
100 | if ((boost::math::isinf)(x)) |
101 | { // - or +infinity. |
102 | result = static_cast<RealType>(0); |
103 | return result; |
104 | } |
105 | RealType limit = policies::get_epsilon<RealType, Policy>(); |
106 | // Use policies so that if policy requests lower precision, |
107 | // then get the normal distribution approximation earlier. |
108 | limit = static_cast<RealType>(1) / limit; // 1/eps |
109 | // for 64-bit double 1/eps = 4503599627370496 |
110 | if (df > limit) |
111 | { // Special case for really big degrees_of_freedom > 1 / eps |
112 | // - use normal distribution which is much faster and more accurate. |
113 | normal_distribution<RealType, Policy> n(0, 1); |
114 | result = pdf(n, x); |
115 | } |
116 | else |
117 | { // |
118 | RealType basem1 = x * x / df; |
119 | if(basem1 < 0.125) |
120 | { |
121 | result = exp(-boost::math::log1p(basem1, Policy()) * (1+df) / 2); |
122 | } |
123 | else |
124 | { |
125 | result = pow(1 / (1 + basem1), (df + 1) / 2); |
126 | } |
127 | result /= sqrt(df) * boost::math::beta(df / 2, RealType(0.5f), Policy()); |
128 | } |
129 | return result; |
130 | } // pdf |
131 | |
132 | template <class RealType, class Policy> |
133 | inline RealType cdf(const students_t_distribution<RealType, Policy>& dist, const RealType& x) |
134 | { |
135 | RealType error_result; |
136 | // degrees_of_freedom > 0 or infinity check: |
137 | RealType df = dist.degrees_of_freedom(); |
138 | if (false == detail::check_df_gt0_to_inf( // Check that df > 0 or == +infinity. |
139 | "boost::math::cdf(const students_t_distribution<%1%>&, %1%)" , df, &error_result, Policy())) |
140 | { |
141 | return error_result; |
142 | } |
143 | // Check for bad x first. |
144 | if(false == detail::check_x_not_NaN( |
145 | "boost::math::cdf(const students_t_distribution<%1%>&, %1%)" , x, &error_result, Policy())) |
146 | { |
147 | return error_result; |
148 | } |
149 | if (x == 0) |
150 | { // Special case with exact result. |
151 | return static_cast<RealType>(0.5); |
152 | } |
153 | if ((boost::math::isinf)(x)) |
154 | { // x == - or + infinity, regardless of df. |
155 | return ((x < 0) ? static_cast<RealType>(0) : static_cast<RealType>(1)); |
156 | } |
157 | |
158 | RealType limit = policies::get_epsilon<RealType, Policy>(); |
159 | // Use policies so that if policy requests lower precision, |
160 | // then get the normal distribution approximation earlier. |
161 | limit = static_cast<RealType>(1) / limit; // 1/eps |
162 | // for 64-bit double 1/eps = 4503599627370496 |
163 | if (df > limit) |
164 | { // Special case for really big degrees_of_freedom > 1 / eps (perhaps infinite?) |
165 | // - use normal distribution which is much faster and more accurate. |
166 | normal_distribution<RealType, Policy> n(0, 1); |
167 | RealType result = cdf(n, x); |
168 | return result; |
169 | } |
170 | else |
171 | { // normal df case. |
172 | // |
173 | // Calculate probability of Student's t using the incomplete beta function. |
174 | // probability = ibeta(degrees_of_freedom / 2, 1/2, degrees_of_freedom / (degrees_of_freedom + t*t)) |
175 | // |
176 | // However when t is small compared to the degrees of freedom, that formula |
177 | // suffers from rounding error, use the identity formula to work around |
178 | // the problem: |
179 | // |
180 | // I[x](a,b) = 1 - I[1-x](b,a) |
181 | // |
182 | // and: |
183 | // |
184 | // x = df / (df + t^2) |
185 | // |
186 | // so: |
187 | // |
188 | // 1 - x = t^2 / (df + t^2) |
189 | // |
190 | RealType x2 = x * x; |
191 | RealType probability; |
192 | if(df > 2 * x2) |
193 | { |
194 | RealType z = x2 / (df + x2); |
195 | probability = ibetac(static_cast<RealType>(0.5), df / 2, z, Policy()) / 2; |
196 | } |
197 | else |
198 | { |
199 | RealType z = df / (df + x2); |
200 | probability = ibeta(df / 2, static_cast<RealType>(0.5), z, Policy()) / 2; |
201 | } |
202 | return (x > 0 ? 1 - probability : probability); |
203 | } |
204 | } // cdf |
205 | |
206 | template <class RealType, class Policy> |
207 | inline RealType quantile(const students_t_distribution<RealType, Policy>& dist, const RealType& p) |
208 | { |
209 | BOOST_MATH_STD_USING // for ADL of std functions |
210 | // |
211 | // Obtain parameters: |
212 | RealType probability = p; |
213 | |
214 | // Check for domain errors: |
215 | RealType df = dist.degrees_of_freedom(); |
216 | static const char* function = "boost::math::quantile(const students_t_distribution<%1%>&, %1%)" ; |
217 | RealType error_result; |
218 | if(false == (detail::check_df_gt0_to_inf( // Check that df > 0 or == +infinity. |
219 | function, df, &error_result, Policy()) |
220 | && detail::check_probability(function, probability, &error_result, Policy()))) |
221 | return error_result; |
222 | // Special cases, regardless of degrees_of_freedom. |
223 | if (probability == 0) |
224 | return -policies::raise_overflow_error<RealType>(function, 0, Policy()); |
225 | if (probability == 1) |
226 | return policies::raise_overflow_error<RealType>(function, 0, Policy()); |
227 | if (probability == static_cast<RealType>(0.5)) |
228 | return 0; // |
229 | // |
230 | #if 0 |
231 | // This next block is disabled in favour of a faster method than |
232 | // incomplete beta inverse, but code retained for future reference: |
233 | // |
234 | // Calculate quantile of Student's t using the incomplete beta function inverse: |
235 | probability = (probability > 0.5) ? 1 - probability : probability; |
236 | RealType t, x, y; |
237 | x = ibeta_inv(degrees_of_freedom / 2, RealType(0.5), 2 * probability, &y); |
238 | if(degrees_of_freedom * y > tools::max_value<RealType>() * x) |
239 | t = tools::overflow_error<RealType>(function); |
240 | else |
241 | t = sqrt(degrees_of_freedom * y / x); |
242 | // |
243 | // Figure out sign based on the size of p: |
244 | // |
245 | if(p < 0.5) |
246 | t = -t; |
247 | |
248 | return t; |
249 | #endif |
250 | // |
251 | // Depending on how many digits RealType has, this may forward |
252 | // to the incomplete beta inverse as above. Otherwise uses a |
253 | // faster method that is accurate to ~15 digits everywhere |
254 | // and a couple of epsilon at double precision and in the central |
255 | // region where most use cases will occur... |
256 | // |
257 | return boost::math::detail::fast_students_t_quantile(df, probability, Policy()); |
258 | } // quantile |
259 | |
260 | template <class RealType, class Policy> |
261 | inline RealType cdf(const complemented2_type<students_t_distribution<RealType, Policy>, RealType>& c) |
262 | { |
263 | return cdf(c.dist, -c.param); |
264 | } |
265 | |
266 | template <class RealType, class Policy> |
267 | inline RealType quantile(const complemented2_type<students_t_distribution<RealType, Policy>, RealType>& c) |
268 | { |
269 | return -quantile(c.dist, c.param); |
270 | } |
271 | |
272 | // |
273 | // Parameter estimation follows: |
274 | // |
275 | namespace detail{ |
276 | // |
277 | // Functors for finding degrees of freedom: |
278 | // |
279 | template <class RealType, class Policy> |
280 | struct sample_size_func |
281 | { |
282 | sample_size_func(RealType a, RealType b, RealType s, RealType d) |
283 | : alpha(a), beta(b), ratio(s*s/(d*d)) {} |
284 | |
285 | RealType operator()(const RealType& df) |
286 | { |
287 | if(df <= tools::min_value<RealType>()) |
288 | { // |
289 | return 1; |
290 | } |
291 | students_t_distribution<RealType, Policy> t(df); |
292 | RealType qa = quantile(complement(t, alpha)); |
293 | RealType qb = quantile(complement(t, beta)); |
294 | qa += qb; |
295 | qa *= qa; |
296 | qa *= ratio; |
297 | qa -= (df + 1); |
298 | return qa; |
299 | } |
300 | RealType alpha, beta, ratio; |
301 | }; |
302 | |
303 | } // namespace detail |
304 | |
305 | template <class RealType, class Policy> |
306 | RealType students_t_distribution<RealType, Policy>::find_degrees_of_freedom( |
307 | RealType difference_from_mean, |
308 | RealType alpha, |
309 | RealType beta, |
310 | RealType sd, |
311 | RealType hint) |
312 | { |
313 | static const char* function = "boost::math::students_t_distribution<%1%>::find_degrees_of_freedom" ; |
314 | // |
315 | // Check for domain errors: |
316 | // |
317 | RealType error_result; |
318 | if(false == detail::check_probability( |
319 | function, alpha, &error_result, Policy()) |
320 | && detail::check_probability(function, beta, &error_result, Policy())) |
321 | return error_result; |
322 | |
323 | if(hint <= 0) |
324 | hint = 1; |
325 | |
326 | detail::sample_size_func<RealType, Policy> f(alpha, beta, sd, difference_from_mean); |
327 | tools::eps_tolerance<RealType> tol(policies::digits<RealType, Policy>()); |
328 | boost::uintmax_t max_iter = policies::get_max_root_iterations<Policy>(); |
329 | std::pair<RealType, RealType> r = tools::bracket_and_solve_root(f, hint, RealType(2), false, tol, max_iter, Policy()); |
330 | RealType result = r.first + (r.second - r.first) / 2; |
331 | if(max_iter >= policies::get_max_root_iterations<Policy>()) |
332 | { |
333 | return policies::raise_evaluation_error<RealType>(function, "Unable to locate solution in a reasonable time:" |
334 | " either there is no answer to how many degrees of freedom are required" |
335 | " or the answer is infinite. Current best guess is %1%" , result, Policy()); |
336 | } |
337 | return result; |
338 | } |
339 | |
340 | template <class RealType, class Policy> |
341 | inline RealType mode(const students_t_distribution<RealType, Policy>& /*dist*/) |
342 | { |
343 | // Assume no checks on degrees of freedom are useful (unlike mean). |
344 | return 0; // Always zero by definition. |
345 | } |
346 | |
347 | template <class RealType, class Policy> |
348 | inline RealType median(const students_t_distribution<RealType, Policy>& /*dist*/) |
349 | { |
350 | // Assume no checks on degrees of freedom are useful (unlike mean). |
351 | return 0; // Always zero by definition. |
352 | } |
353 | |
354 | // See section 5.1 on moments at http://en.wikipedia.org/wiki/Student%27s_t-distribution |
355 | |
356 | template <class RealType, class Policy> |
357 | inline RealType mean(const students_t_distribution<RealType, Policy>& dist) |
358 | { // Revised for https://svn.boost.org/trac/boost/ticket/7177 |
359 | RealType df = dist.degrees_of_freedom(); |
360 | if(((boost::math::isnan)(df)) || (df <= 1) ) |
361 | { // mean is undefined for moment <= 1! |
362 | return policies::raise_domain_error<RealType>( |
363 | "boost::math::mean(students_t_distribution<%1%> const&, %1%)" , |
364 | "Mean is undefined for degrees of freedom < 1 but got %1%." , df, Policy()); |
365 | return std::numeric_limits<RealType>::quiet_NaN(); |
366 | } |
367 | return 0; |
368 | } // mean |
369 | |
370 | template <class RealType, class Policy> |
371 | inline RealType variance(const students_t_distribution<RealType, Policy>& dist) |
372 | { // http://en.wikipedia.org/wiki/Student%27s_t-distribution |
373 | // Revised for https://svn.boost.org/trac/boost/ticket/7177 |
374 | RealType df = dist.degrees_of_freedom(); |
375 | if ((boost::math::isnan)(df) || (df <= 2)) |
376 | { // NaN or undefined for <= 2. |
377 | return policies::raise_domain_error<RealType>( |
378 | "boost::math::variance(students_t_distribution<%1%> const&, %1%)" , |
379 | "variance is undefined for degrees of freedom <= 2, but got %1%." , |
380 | df, Policy()); |
381 | return std::numeric_limits<RealType>::quiet_NaN(); // Undefined. |
382 | } |
383 | if ((boost::math::isinf)(df)) |
384 | { // +infinity. |
385 | return 1; |
386 | } |
387 | RealType limit = policies::get_epsilon<RealType, Policy>(); |
388 | // Use policies so that if policy requests lower precision, |
389 | // then get the normal distribution approximation earlier. |
390 | limit = static_cast<RealType>(1) / limit; // 1/eps |
391 | // for 64-bit double 1/eps = 4503599627370496 |
392 | if (df > limit) |
393 | { // Special case for really big degrees_of_freedom > 1 / eps. |
394 | return 1; |
395 | } |
396 | else |
397 | { |
398 | return df / (df - 2); |
399 | } |
400 | } // variance |
401 | |
402 | template <class RealType, class Policy> |
403 | inline RealType skewness(const students_t_distribution<RealType, Policy>& dist) |
404 | { |
405 | RealType df = dist.degrees_of_freedom(); |
406 | if( ((boost::math::isnan)(df)) || (dist.degrees_of_freedom() <= 3)) |
407 | { // Undefined for moment k = 3. |
408 | return policies::raise_domain_error<RealType>( |
409 | "boost::math::skewness(students_t_distribution<%1%> const&, %1%)" , |
410 | "Skewness is undefined for degrees of freedom <= 3, but got %1%." , |
411 | dist.degrees_of_freedom(), Policy()); |
412 | return std::numeric_limits<RealType>::quiet_NaN(); |
413 | } |
414 | return 0; // For all valid df, including infinity. |
415 | } // skewness |
416 | |
417 | template <class RealType, class Policy> |
418 | inline RealType kurtosis(const students_t_distribution<RealType, Policy>& dist) |
419 | { |
420 | RealType df = dist.degrees_of_freedom(); |
421 | if(((boost::math::isnan)(df)) || (df <= 4)) |
422 | { // Undefined or infinity for moment k = 4. |
423 | return policies::raise_domain_error<RealType>( |
424 | "boost::math::kurtosis(students_t_distribution<%1%> const&, %1%)" , |
425 | "Kurtosis is undefined for degrees of freedom <= 4, but got %1%." , |
426 | df, Policy()); |
427 | return std::numeric_limits<RealType>::quiet_NaN(); // Undefined. |
428 | } |
429 | if ((boost::math::isinf)(df)) |
430 | { // +infinity. |
431 | return 3; |
432 | } |
433 | RealType limit = policies::get_epsilon<RealType, Policy>(); |
434 | // Use policies so that if policy requests lower precision, |
435 | // then get the normal distribution approximation earlier. |
436 | limit = static_cast<RealType>(1) / limit; // 1/eps |
437 | // for 64-bit double 1/eps = 4503599627370496 |
438 | if (df > limit) |
439 | { // Special case for really big degrees_of_freedom > 1 / eps. |
440 | return 3; |
441 | } |
442 | else |
443 | { |
444 | //return 3 * (df - 2) / (df - 4); re-arranged to |
445 | return 6 / (df - 4) + 3; |
446 | } |
447 | } // kurtosis |
448 | |
449 | template <class RealType, class Policy> |
450 | inline RealType kurtosis_excess(const students_t_distribution<RealType, Policy>& dist) |
451 | { |
452 | // see http://mathworld.wolfram.com/Kurtosis.html |
453 | |
454 | RealType df = dist.degrees_of_freedom(); |
455 | if(((boost::math::isnan)(df)) || (df <= 4)) |
456 | { // Undefined or infinity for moment k = 4. |
457 | return policies::raise_domain_error<RealType>( |
458 | "boost::math::kurtosis_excess(students_t_distribution<%1%> const&, %1%)" , |
459 | "Kurtosis_excess is undefined for degrees of freedom <= 4, but got %1%." , |
460 | df, Policy()); |
461 | return std::numeric_limits<RealType>::quiet_NaN(); // Undefined. |
462 | } |
463 | if ((boost::math::isinf)(df)) |
464 | { // +infinity. |
465 | return 0; |
466 | } |
467 | RealType limit = policies::get_epsilon<RealType, Policy>(); |
468 | // Use policies so that if policy requests lower precision, |
469 | // then get the normal distribution approximation earlier. |
470 | limit = static_cast<RealType>(1) / limit; // 1/eps |
471 | // for 64-bit double 1/eps = 4503599627370496 |
472 | if (df > limit) |
473 | { // Special case for really big degrees_of_freedom > 1 / eps. |
474 | return 0; |
475 | } |
476 | else |
477 | { |
478 | return 6 / (df - 4); |
479 | } |
480 | } |
481 | |
482 | template <class RealType, class Policy> |
483 | inline RealType entropy(const students_t_distribution<RealType, Policy>& dist) |
484 | { |
485 | using std::log; |
486 | using std::sqrt; |
487 | RealType v = dist.degrees_of_freedom(); |
488 | RealType vp1 = (v+1)/2; |
489 | RealType vd2 = v/2; |
490 | |
491 | return vp1*(digamma(vp1) - digamma(vd2)) + log(sqrt(v)*beta(vd2, RealType(1)/RealType(2))); |
492 | } |
493 | |
494 | } // namespace math |
495 | } // namespace boost |
496 | |
497 | #ifdef BOOST_MSVC |
498 | # pragma warning(pop) |
499 | #endif |
500 | |
501 | // This include must be at the end, *after* the accessors |
502 | // for this distribution have been defined, in order to |
503 | // keep compilers that support two-phase lookup happy. |
504 | #include <boost/math/distributions/detail/derived_accessors.hpp> |
505 | |
506 | #endif // BOOST_STATS_STUDENTS_T_HPP |
507 | |