6 #ifndef THEORETICA_STATISTICS_H
7 #define THEORETICA_STATISTICS_H
9 #include "../core/constants.h"
10 #include "../core/real_analysis.h"
11 #include "../core/special.h"
12 #include "../calculus/integral.h"
13 #include "../calculus/gauss.h"
14 #include "../core/dataset.h"
29 template<
typename Dataset>
40 template<
typename Dataset>
53 template<
typename Dataset>
56 return range(X) / 2.0;
67 template<
typename Dataset>
85 template<
typename Dataset1,
typename Dataset2>
88 if(sigma.size() !=
mean.size()) {
89 TH_MATH_ERROR(
"propagate_product", sigma.size(), INVALID_ARGUMENT);
95 for (
unsigned int i = 0; i < sigma.size(); ++i) {
115 template<
typename Dataset>
119 TH_MATH_ERROR(
"total_sum_squares", X.size(), INVALID_ARGUMENT);
129 for (
size_t i = 1; i < X.size(); ++i) {
131 const real tmp = avg;
133 avg = tmp + (X[i] - tmp) / (i + 1);
134 s += (X[i] - tmp) * (X[i] - avg);
150 template<
typename Dataset>
153 if(X.size() <= constraints) {
171 template<
typename Dataset>
173 const Dataset& X,
real& out_mean,
174 real& out_variance,
unsigned int constraints = 1) {
176 if(X.size() <= constraints) {
177 TH_MATH_ERROR(
"total_sum_squares", X.size(), INVALID_ARGUMENT);
179 out_variance =
nan();
189 for (
size_t i = 1; i < X.size(); ++i) {
191 const real tmp = avg;
193 avg = tmp + (X[i] - tmp) / (i + 1);
194 tss += (X[i] - tmp) * (X[i] - avg);
198 out_variance =
tss / (X.size() - constraints);
211 template<
typename Dataset>
212 inline real stdev(
const Dataset& data,
unsigned int constraints = 1) {
223 template<
typename Dataset>
238 template<
typename Dataset>
244 TH_MATH_ERROR(
"standard_relative_error", x_mean, DIV_BY_ZERO);
261 template<
typename Dataset1,
typename Dataset2>
263 const Dataset1& X,
const Dataset2& Y,
unsigned int constraints = 1) {
265 if(X.size() != Y.size() || X.size() <= constraints) {
274 for (
unsigned int i = 0; i < X.size(); ++i)
275 s += (X[i] - X_mean) * (Y[i] - Y_mean);
277 return s / (X.size() - constraints);
289 template<
typename Dataset1,
typename Dataset2>
291 const Dataset1& X,
const Dataset2& Y) {
304 template<
typename Dataset>
308 TH_MATH_ERROR(
"autocorrelation", X.size(), INVALID_ARGUMENT);
316 for (
unsigned int i = n; i < X.size(); ++i) {
318 const real delta = X[i] - mu;
319 num += delta * (X[i - n] - mu);
320 den += delta * delta;
333 template<
typename Dataset>
342 return res / X.size();
352 template<
typename Dataset>
362 res +=
cube((x - mu) / sigma);
364 return res / X.size();
374 template<
typename Dataset>
384 res +=
pow((x - mu) / sigma, 4);
386 return (res / X.size()) - 3;
400 template<
typename RealFunction>
421 return (x -
mean) / sigma;
430 template<
typename Dataset>
437 return map([mu, sigma](
real x) {
return z_score(x, mu, sigma); }, X);
452 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
454 const Dataset1& O,
const Dataset2&
E,
const Dataset3& sigma) {
456 if(O.size() !=
E.size() ||
E.size() != sigma.size()) {
463 for (
unsigned int i = 0; i < O.size(); ++i) {
470 c_sqr +=
square((O[i] -
E[i]) / sigma[i]);
500 const real new_x = (chi_sqr - ndf) /
sqrt(2.0 * ndf);
509 }, -new_x, 1
E-16, 25);
519 }, new_x, 1
E-16, 25);
531 if((ndf > 70 && chi_sqr < (ndf / 2.0))) {
535 return pow(
sqrt(x + chi_sqr / 2), ndf - 2) *
exp(-x);
539 return exp((ndf - 2) / 2.0 *
ln(x + chi_sqr / 2) - x);
540 }, 1, ndf / 2, 1
E-12, 25);
548 return pow(
sqrt(x + chi_sqr / 2), ndf - 2);
549 }, tables::laguerre_roots_16, tables::laguerre_weights_16, 16);
565 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
567 const Dataset1& X,
const Dataset2& Y,
568 const Dataset3& sigma,
real intercept,
real slope) {
570 if(X.size() != Y.size() || X.size() != sigma.size()) {
573 X.size(), INVALID_ARGUMENT);
578 for (
unsigned int i = 0; i < X.size(); ++i) {
605 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
607 const Dataset1& X,
const Dataset2& Y,
608 const Dataset3& sigma,
real intercept,
real slope) {
612 Y.size(), INVALID_ARGUMENT);
618 / (
real) (Y.size() - 2);
#define TH_MATH_ERROR(F_NAME, VALUE, EXCEPTION)
TH_MATH_ERROR is a macro which throws exceptions or modifies errno (depending on which compiling opti...
Definition: error.h:219
real chi_squared(real x, unsigned int k)
Chi-squared distribution.
Definition: distributions.h:162
real lngamma(real x)
Log Gamma special function of real argument.
Definition: special.h:59
real pvalue_chi_squared(real chi_sqr, unsigned int ndf)
Compute the (right-tailed) p-value associated to a computed Chi-square value as the integral of the C...
Definition: statistics.h:489
void moments2(const Dataset &X, real &out_mean, real &out_variance, unsigned int constraints=1)
Compute the mean and the variance of a dataset in a single pass, using Welford's method,...
Definition: statistics.h:172
real semidispersion(const Dataset &X)
Computes the maximum semidispersion of a data set defined as .
Definition: statistics.h:54
real stdev(const histogram &h)
Compute the standard deviation of the values of a histogram.
Definition: histogram.h:320
Dataset normalize_z_score(const Dataset &X)
Normalize a data set using Z-score normalization.
Definition: statistics.h:431
real autocorrelation(const Dataset &X, unsigned int n=1)
Compute the lag-n autocorrelation of a dataset as .
Definition: statistics.h:305
real covariance(const Dataset1 &X, const Dataset2 &Y, unsigned int constraints=1)
Compute the covariance between two datasets with the given number of constraints.
Definition: statistics.h:262
real chi_square_linear(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &sigma, real intercept, real slope)
Compute the chi-square on a linear regression, as the sum of the squares of the residuals divided by ...
Definition: statistics.h:566
real propagate_product(const Dataset1 &sigma, const Dataset2 &mean)
Propagate the error over a product of random variables under quadrature, as , where each corresponds...
Definition: statistics.h:86
real standard_relative_error(const Dataset &X)
Compute the relative error on a dataset using estimates of its mean and standard deviation,...
Definition: statistics.h:239
real total_sum_squares(const Dataset &X)
Compute the total sum of squares (TSS) of a given dataset as using Welford's one-pass method.
Definition: statistics.h:116
real variance(const histogram &h)
Compute the variance of the values of a histogram.
Definition: histogram.h:308
real absolute_deviation(const Dataset &X)
Compute the mean absolute deviation of a dataset as .
Definition: statistics.h:334
real gaussian_expectation(RealFunction g, real mean, real sigma)
Compute the expectation value of a given function with respect to a Gaussian distribution with the gi...
Definition: statistics.h:401
real stdom(const Dataset &X)
Compute the standard deviation of the mean given a dataset.
Definition: statistics.h:224
real propagate_sum(const Dataset &sigma)
Propagate the error over a sum of random variables under quadrature, as , where each corresponds to ...
Definition: statistics.h:68
real range(const Dataset &X)
Computes the range of a data set, defined as .
Definition: statistics.h:41
real mean(const histogram &h)
Compute the mean of the values of a histogram.
Definition: histogram.h:296
real skewness(const Dataset &X)
Compute the skewness of a dataset as .
Definition: statistics.h:353
real kurtosis(const Dataset &X)
Compute the normalized kurtosis of a dataset as .
Definition: statistics.h:375
real chi_square(const Dataset1 &O, const Dataset2 &E, const Dataset3 &sigma)
Compute the chi-square from the set of observed quantities, expected quantities and errors.
Definition: statistics.h:453
real tss(const histogram &h)
Compute the total sum of squares of the values of the histogram.
Definition: histogram.h:302
real correlation_coefficient(const Dataset1 &X, const Dataset2 &Y)
Compute Pearson's correlation coefficient R between two datasets.
Definition: statistics.h:290
real z_score(real x, real mean, real sigma)
Compute the Z-score of an observed value with respect to a Gaussian distribution with the given param...
Definition: statistics.h:419
real reduced_chi_square_linear(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &sigma, real intercept, real slope)
Compute the reduced chi-squared on a linear regression, computed as the usual chi-square (computed by...
Definition: statistics.h:606
Main namespace of the library which contains all functions and objects.
Definition: algebra.h:27
double real
A real number, defined as a floating point type.
Definition: constants.h:198
real arithmetic_mean(const Dataset &data)
Compute the arithmetic mean of a set of values.
Definition: dataset.h:375
auto min(const Vector &X)
Finds the minimum value inside a dataset.
Definition: dataset.h:351
dual2 sqrt(dual2 x)
Compute the square root of a second order dual number.
Definition: dual2_functions.h:54
dual2 ln(dual2 x)
Compute the natural logarithm of a second order dual number.
Definition: dual2_functions.h:151
dual2 abs(dual2 x)
Compute the absolute value of a second order dual number.
Definition: dual2_functions.h:198
constexpr real SQRTPI
The square root of Pi.
Definition: constants.h:234
dual2 exp(dual2 x)
Compute the exponential of a second order dual number.
Definition: dual2_functions.h:138
real integral_inf_riemann(real_function f, real a, real step_sz=1, real tol=CALCULUS_INTEGRAL_TOL, unsigned int max_iter=100)
Integrate a function from a point up to infinity by integrating it by steps, stopping execution when ...
Definition: integral.h:501
auto max(const Vector &X)
Finds the maximum value inside a dataset.
Definition: dataset.h:330
real integral_gauss(RealFunction f, const std::vector< real > &x, const std::vector< real > &w)
Use Gaussian quadrature using the given points and weights.
Definition: integral.h:234
constexpr real MACH_EPSILON
Machine epsilon for the real type.
Definition: constants.h:207
constexpr real E
The Euler mathematical constant (e)
Definition: constants.h:237
real integral_romberg_tol(RealFunction f, real a, real b, real tolerance=CALCULUS_INTEGRAL_TOL)
Approximate the definite integral of an arbitrary function using Romberg's method to the given tolera...
Definition: integral.h:197
real integral_hermite(RealFunction f, const std::vector< real > &x)
Use Gauss-Hermite quadrature of arbitrary degree to approximate an integral over (-inf,...
Definition: integral.h:463
Vector2 & map(Function f, const Vector1 &src, Vector2 &dest)
Get a new vector obtained by applying the function element-wise.
Definition: dataset.h:266
constexpr real SQRT2
The square root of 2.
Definition: constants.h:261
dual2 square(dual2 x)
Return the square of a second order dual number.
Definition: dual2_functions.h:23
real nan()
Return a quiet NaN number in floating point representation.
Definition: error.h:54
auto sum_squares(const Vector &X)
Sum the squares of a set of values.
Definition: dataset.h:127
dual2 pow(dual2 x, int n)
Compute the n-th power of a second order dual number.
Definition: dual2_functions.h:41
dual2 cube(dual2 x)
Return the cube of a second order dual number.
Definition: dual2_functions.h:29