Statistical functions. More...

Functions
real	likelihood (const vec< real > &X, const vec< real > &theta, stat_function f)
	Compute the likelihood of a distribution <f> with the given parameters <theta> and measures <X>

real	log_likelihood (const vec< real > &X, const vec< real > &theta, stat_function f)
	Compute the log likelihood of a distribution <f> with the given parameters <theta> and measures <X>

template<typename Matrix = mat<real>, typename Dataset = vec<real>, enable_vector< Dataset > = true>
Matrix	covar_mat (const std::vector< Dataset > &v)
	Build the covariance matrix given a vector of datasets by computing the covariance between all couples of sets.

template<unsigned int N = 0, typename MultiDualFunction = autodiff::dreal_t<N>(*)(autodiff::dvec_t<N>)>
real	propagerr (MultiDualFunction f, const vec< real, N > &x_best, const vec< real, N > &delta_x)
	Automatically propagate uncertainties under quadrature on an arbitrary function given the uncertainties on the variables, the mean values of the variables and the function itself, by using automatic differentiation.

template<unsigned int N = 0, typename Matrix , enable_matrix< Matrix > = true, typename MultiDualFunction = autodiff::dreal_t<N>(*)(autodiff::dvec_t<N>)>
real	propagerr (MultiDualFunction f, const vec< real, N > &x_best, const Matrix &cm)
	Automatically propagate uncertainties under quadrature on an arbitrary function given the uncertainties on the variables, the mean values of the variables and the function itself, by using automatic differentiation.

template<unsigned int N = 0, typename MultiDualFunction = multidual<N>(*)(autodiff::dvec_t<N>), typename Dataset = vec<real, N>>
real	propagerr (MultiDualFunction f, const std::vector< Dataset > &v)
	Automatically propagate uncertainties under quadrature on an arbitrary function given the function and the set of measured data.

template<typename Function >
real	propagerr_mc (Function f, std::vector< pdf_sampler > &rv, unsigned int N=1E+6)
	Propagate the statistical error on a given function using the Monte Carlo method, by generating a sample following the probability distribution of the function and computing its standard deviation.

real	mean (const histogram &h)
	Compute the mean of the values of a histogram.

real	tss (const histogram &h)
	Compute the total sum of squares of the values of the histogram.

real	variance (const histogram &h)
	Compute the variance of the values of a histogram.

real	stdev (const histogram &h)
	Compute the standard deviation of the values of a histogram.

template<typename Dataset >
real	mean (const Dataset &X)
	Compute the mean of a dataset.

template<typename Dataset >
real	range (const Dataset &X)
	Computes the range of a data set, defined as \(x_{max} - {x_min}\).

template<typename Dataset >
real	semidispersion (const Dataset &X)
	Computes the maximum semidispersion of a data set defined as \((x_{max} - {x_min}) / 2\).

template<typename Dataset >
real	propagate_sum (const Dataset &sigma)
	Propagate the error over a sum of random variables under quadrature, as \(\sqrt{\sum_{i = 1}^n \sigma_i^2}\), where each \(\sigma_i\) corresponds to the standard deviation of a variable.

template<typename Dataset1 , typename Dataset2 >
real	propagate_product (const Dataset1 &sigma, const Dataset2 &mean)
	Propagate the error over a product of random variables under quadrature, as \(\sqrt{\sum_{i = 1}} (\sigma_i / \mu_i)^2}\), where each \(\sigma_i\) corresponds to the standard deviation of a variable.

template<typename Dataset >
real	total_sum_squares (const Dataset &X)
	Compute the total sum of squares (TSS) of a given dataset as \(sum(square(x_i - x_{mean}))\) using Welford's one-pass method.

template<typename Dataset >
real	variance (const Dataset &X, unsigned int constraints=1)
	Compute the variance given a dataset and the number of constraints.

template<typename Dataset >
void	moments2 (const Dataset &X, real &out_mean, real &out_variance, unsigned int constraints=1)
	Compute the mean and the variance of a dataset in a single pass, using Welford's method, with the given number of constraints (defaults to 1 for Bessel's correction).

template<typename Dataset >
real	stdev (const Dataset &data, unsigned int constraints=1)
	Compute the standard deviation given a dataset and the number of constraints.

template<typename Dataset >
real	stdom (const Dataset &X)
	Compute the standard deviation of the mean given a dataset.

template<typename Dataset >
real	standard_relative_error (const Dataset &X)
	Compute the relative error on a dataset using estimates of its mean and standard deviation, with the given number of constraints (defaults to 1 for Bessel's correction).

template<typename Dataset1 , typename Dataset2 >
real	covariance (const Dataset1 &X, const Dataset2 &Y, unsigned int constraints=1)
	Compute the covariance between two datasets with the given number of constraints.

template<typename Dataset1 , typename Dataset2 >
real	correlation_coefficient (const Dataset1 &X, const Dataset2 &Y)
	Compute Pearson's correlation coefficient R between two datasets.

template<typename Dataset >
real	autocorrelation (const Dataset &X, unsigned int n=1)
	Compute the lag-n autocorrelation of a dataset as \(\).

template<typename Dataset >
real	absolute_deviation (const Dataset &X)
	Compute the mean absolute deviation of a dataset as \(\frac{\sum_{i = 1}^n \|x_i - \hat \mu\|}{n}\).

template<typename Dataset >
real	skewness (const Dataset &X)
	Compute the skewness of a dataset as \(\frac{\sum_{i=1}^n (\frac{x_i - \hat \mu}{\hat \sigma})^3}{n}\).

template<typename Dataset >
real	kurtosis (const Dataset &X)
	Compute the normalized kurtosis of a dataset as \(\frac{\sum_{i=1}^n (\frac{x_i - \hat \mu}{\hat \sigma})^4}{n} - 3\).

template<typename RealFunction >
real	gaussian_expectation (RealFunction g, real mean, real sigma)
	Compute the expectation value of a given function with respect to a Gaussian distribution with the given parameters.

real	z_score (real x, real mean, real sigma)
	Compute the Z-score of an observed value with respect to a Gaussian distribution with the given parameters.

template<typename Dataset >
Dataset	normalize_z_score (const Dataset &X)
	Normalize a data set using Z-score normalization.

template<typename Dataset1 , typename Dataset2 , typename Dataset3 >
real	chi_square (const Dataset1 &O, const Dataset2 &E, const Dataset3 &sigma)
	Compute the chi-square from the set of observed quantities, expected quantities and errors.

real	pvalue_chi_squared (real chi_sqr, unsigned int ndf)
	Compute the (right-tailed) p-value associated to a computed Chi-square value as the integral of the Chi-squared distribution from the given value to infinity (right-tailed).

template<typename Dataset1 , typename Dataset2 , typename Dataset3 >
real	chi_square_linear (const Dataset1 &X, const Dataset2 &Y, const Dataset3 &sigma, real intercept, real slope)
	Compute the chi-square on a linear regression, as the sum of the squares of the residuals divided by the standard deviation.

template<typename Dataset1 , typename Dataset2 , typename Dataset3 >
real	reduced_chi_square_linear (const Dataset1 &X, const Dataset2 &Y, const Dataset3 &sigma, real intercept, real slope)
	Compute the reduced chi-squared on a linear regression, computed as the usual chi-square (computed by chi_square_linear) divided by the number of degrees of freedom of the model ( \(N - 2\)).

Detailed Description

Statistical functions.

Function Documentation

◆ absolute_deviation()

template<typename Dataset >

real theoretica::stats::absolute_deviation ( const Dataset & X )

inline

Compute the mean absolute deviation of a dataset as \(\frac{\sum_{i = 1}^n |x_i - \hat \mu|}{n}\).

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset

Returns: The mean absolute deviation of the dataset

◆ autocorrelation()

template<typename Dataset >

real theoretica::stats::autocorrelation	(	const Dataset &	X,
		unsigned int	n = `1`
	)

inline

Compute the lag-n autocorrelation of a dataset as \(\).

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset
n	The lag (defaults to lag-1)

Returns: The lag-n autocorrelation of the given dataset

◆ chi_square()

template<typename Dataset1 , typename Dataset2 , typename Dataset3 >

real theoretica::stats::chi_square	(	const Dataset1 &	O,
		const Dataset2 &	E,
		const Dataset3 &	sigma
	)

inline

Compute the chi-square from the set of observed quantities, expected quantities and errors.

The provided sets should all have the same size.

Template Parameters

Dataset1	Any type representing a dataset as a vector of values
Dataset2	Any type representing a dataset as a vector of values
Dataset3	Any type representing a dataset as a vector of values

Parameters

O	The set of observed values
E	The set of expected values
sigma	The set of standard deviations on the observations

Returns: The computed Chi-squared

◆ chi_square_linear()

template<typename Dataset1 , typename Dataset2 , typename Dataset3 >

real theoretica::stats::chi_square_linear	(	const Dataset1 &	X,
		const Dataset2 &	Y,
		const Dataset3 &	sigma,
		real	intercept,
		real	slope
	)

inline

Compute the chi-square on a linear regression, as the sum of the squares of the residuals divided by the standard deviation.

Template Parameters

Dataset1	Any type representing a dataset as a vector of values
Dataset2	Any type representing a dataset as a vector of values
Dataset3	Any type representing a dataset as a vector of values

Parameters

X	A vector of the X values of the sample
Y	A vector of the Y values of the sample
sigma	The standard deviations of each point of the sample
intercept	The intercept of the linear model
slope	The slope of the linear model

◆ correlation_coefficient()

template<typename Dataset1 , typename Dataset2 >

real theoretica::stats::correlation_coefficient	(	const Dataset1 &	X,
		const Dataset2 &	Y
	)

inline

Compute Pearson's correlation coefficient R between two datasets.

The two datasets must have the same size.

Template Parameters

Dataset1	Any type representing a dataset as a vector of values
Dataset2	Any type representing a dataset as a vector of values

Parameters

X	The first dataset
Y	The second dataset

Returns: The correlation coefficient computed using Pearson's formula

◆ covar_mat()

template<typename Matrix = mat<real>, typename Dataset = vec<real>, enable_vector< Dataset > = true>

Matrix theoretica::stats::covar_mat ( const std::vector< Dataset > & v )

inline

Build the covariance matrix given a vector of datasets by computing the covariance between all couples of sets.

Parameters

v	A vector of datasets of measures

Returns: The covariance matrix of the datasets

◆ covariance()

template<typename Dataset1 , typename Dataset2 >

real theoretica::stats::covariance	(	const Dataset1 &	X,
		const Dataset2 &	Y,
		unsigned int	constraints = `1`
	)

inline

Compute the covariance between two datasets with the given number of constraints.

The two datasets must have the same size.

Template Parameters

Dataset1	Any type representing a dataset as a vector of values
Dataset2	Any type representing a dataset as a vector of values

Parameters

X	The first dataset
Y	The second dataset
constraints	The number of constraints (defaults to 1 for Bessel's correction).

Returns: The covariance between X and Y

◆ gaussian_expectation()

template<typename RealFunction >

real theoretica::stats::gaussian_expectation	(	RealFunction	g,
		real	mean,
		real	sigma
	)

inline

Compute the expectation value of a given function with respect to a Gaussian distribution with the given parameters.

This function uses Gauss-Hermite quadrature to compute the integral \(\int_{-\infty}^{+\infty} g(x) e^{-x^2} dx\)

Template Parameters

RealFunction A function or lambda representing a univariate real function

Parameters

mean	The mean of the Gaussian distribution
sigma	The standard deviation of the Gaussian distribution
g	The function to compute the expectation of

Returns: The Gaussian expectation of the given function

◆ kurtosis()

template<typename Dataset >

real theoretica::stats::kurtosis ( const Dataset & X )

inline

Compute the normalized kurtosis of a dataset as \(\frac{\sum_{i=1}^n (\frac{x_i - \hat \mu}{\hat \sigma})^4}{n} - 3\).

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset

Returns: The normalized kurtosis of the dataset

◆ likelihood()

real theoretica::stats::likelihood	(	const vec< real > &	X,
		const vec< real > &	theta,
		stat_function	f
	)

inline

Compute the likelihood of a distribution <f> with the given parameters <theta> and measures <X>

Parameters

X	The dataset of the sample
theta	The parameters of the distribution
f	The statistical distribution function

Returns: The likelihood of the given sample

◆ log_likelihood()

real theoretica::stats::log_likelihood	(	const vec< real > &	X,
		const vec< real > &	theta,
		stat_function	f
	)

inline

Compute the log likelihood of a distribution <f> with the given parameters <theta> and measures <X>

Parameters

X	The dataset of the sample
theta	The parameters of the distribution
f	The statistical distribution function

Returns: The log-likelihood of the given sample

◆ mean()

template<typename Dataset >

real theoretica::stats::mean ( const Dataset & X )

inline

Compute the mean of a dataset.

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset

Returns: The mean of the dataset

◆ moments2()

template<typename Dataset >

void theoretica::stats::moments2	(	const Dataset &	X,
		real &	out_mean,
		real &	out_variance,
		unsigned int	constraints = `1`
	)

inline

Compute the mean and the variance of a dataset in a single pass, using Welford's method, with the given number of constraints (defaults to 1 for Bessel's correction).

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset
out_mean	A reference to overwrite with the computed mean
out_variance	A reference to overwrite with the computed variance
constraints	The number of constraints (defaults to 1)

◆ normalize_z_score()

template<typename Dataset >

Dataset theoretica::stats::normalize_z_score ( const Dataset & X )

inline

Normalize a data set using Z-score normalization.

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

The	data set to normalize

Returns: The normalized data set

◆ propagate_product()

template<typename Dataset1 , typename Dataset2 >

real theoretica::stats::propagate_product	(	const Dataset1 &	sigma,
		const Dataset2 &	mean
	)

inline

Propagate the error over a product of random variables under quadrature, as \(\sqrt{\sum_{i = 1}} (\sigma_i / \mu_i)^2}\), where each \(\sigma_i\) corresponds to the standard deviation of a variable.

The random variables are assumed to be statistically independent and the result is the relative error over the product.

Template Parameters

Dataset1	Any type representing a dataset as a vector of values
Dataset2	Any type representing a dataset as a vector of values

Parameters

sigma	The vector of standard deviations
mean	The vector of the mean values

Returns: The propagated relative error over the product

◆ propagate_sum()

template<typename Dataset >

real theoretica::stats::propagate_sum ( const Dataset & sigma )

inline

Propagate the error over a sum of random variables under quadrature, as \(\sqrt{\sum_{i = 1}^n \sigma_i^2}\), where each \(\sigma_i\) corresponds to the standard deviation of a variable.

The random variables are assumed to be statistically independent.

Parameters

sigma The vector of standard deviations

Returns: The propagated error over the sum

◆ propagerr() [1/3]

template<unsigned int N = 0, typename MultiDualFunction = multidual<N>(*)(autodiff::dvec_t<N>), typename Dataset = vec<real, N>>

real theoretica::stats::propagerr	(	MultiDualFunction	f,
		const std::vector< Dataset > &	v
	)

inline

Automatically propagate uncertainties under quadrature on an arbitrary function given the function and the set of measured data.

The covar_mat function is used to estimate the covariance matrix from the data sets. For this to work, the data sets should have the same size, so as to estimate their covariance.

Parameters

f	The function to propagate error on
v	A vector of different datasets of the measures of the variables

Returns: The propagated error on the function

◆ propagerr() [2/3]

template<unsigned int N = 0, typename Matrix , enable_matrix< Matrix > = true, typename MultiDualFunction = autodiff::dreal_t<N>(*)(autodiff::dvec_t<N>)>

real theoretica::stats::propagerr	(	MultiDualFunction	f,
		const vec< real, N > &	x_best,
		const Matrix &	cm
	)

inline

Automatically propagate uncertainties under quadrature on an arbitrary function given the uncertainties on the variables, the mean values of the variables and the function itself, by using automatic differentiation.

Parameters

f	The function to propagate error on
x	Best values for the variables
cm	Covariance matrix of the variables, where diagonal entries are the variance of the variables and off-diagonal entries are the covariance between different variables. May be constructed from datasets using the function covar_mat.

Returns: The propagated error on the function

◆ propagerr() [3/3]

template<unsigned int N = 0, typename MultiDualFunction = autodiff::dreal_t<N>(*)(autodiff::dvec_t<N>)>

real theoretica::stats::propagerr	(	MultiDualFunction	f,
		const vec< real, N > &	x_best,
		const vec< real, N > &	delta_x
	)

inline

Automatically propagate uncertainties under quadrature on an arbitrary function given the uncertainties on the variables, the mean values of the variables and the function itself, by using automatic differentiation.

This function assumes that the correlation between different variables is zero, if that is not the case, the covariance matrix should be used.

Parameters

f	The function to propagate error on
x	Best values for the variables
delta_x	Vector of uncertainties on the variables

Returns: The propagated error on the function

◆ propagerr_mc()

template<typename Function >

real theoretica::stats::propagerr_mc	(	Function	f,
		std::vector< pdf_sampler > &	rv,
		unsigned int	N = `1E+6`
	)

Propagate the statistical error on a given function using the Monte Carlo method, by generating a sample following the probability distribution of the function and computing its standard deviation.

N sample vectors of size M are generated by sampling the M different pdf_sampler distributions which correspond to the input variables of the function. The resulting sample is used to estimate the standard deviation over the result of the function.

Parameters

f	The function to propagate error on
rv	A list of distribution samplers which sample from the probability distributions of the random variables.
N	The number of sampled values to use, defaults to 1 million.

Returns: The standard deviation of the Monte Carlo sample

◆ pvalue_chi_squared()

real theoretica::stats::pvalue_chi_squared	(	real	chi_sqr,
		unsigned int	ndf
	)

inline

Compute the (right-tailed) p-value associated to a computed Chi-square value as the integral of the Chi-squared distribution from the given value to infinity (right-tailed).

An equivalent integral is computed using Gauss-Laguerre quadrature: \( p = \frac{e^{-X^2}}{2 \Gamma (k/2)} \int_0^{+\infty} (\sqrt{x + X^2})^{k - 2} e^{-x} dx \)

Parameters

chi_sqr	The computed Chi-squared
ndf	Number of Degrees of Freedom

Returns: The computed p-value

Note: The current implementation has reduced precision for 260 <= ndf < 1000 because for ndf >= 260 the Gaussian approximation is used, which becomes more precise the higher the ndf.

◆ range()

template<typename Dataset >

real theoretica::stats::range ( const Dataset & X )

inline

Computes the range of a data set, defined as \(x_{max} - {x_min}\).

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset

Returns: The range of the values of the dataset

◆ reduced_chi_square_linear()

template<typename Dataset1 , typename Dataset2 , typename Dataset3 >

real theoretica::stats::reduced_chi_square_linear	(	const Dataset1 &	X,
		const Dataset2 &	Y,
		const Dataset3 &	sigma,
		real	intercept,
		real	slope
	)

inline

Compute the reduced chi-squared on a linear regression, computed as the usual chi-square (computed by chi_square_linear) divided by the number of degrees of freedom of the model ( \(N - 2\)).

Template Parameters

Dataset1	Any type representing a dataset as a vector of values
Dataset2	Any type representing a dataset as a vector of values
Dataset3	Any type representing a dataset as a vector of values

Parameters

X	A vector of the X values of the sample
Y	A vector of the Y values of the sample
sigma	The standard deviations of each point of the sample
intercept	The intercept of the linear model
slope	The slope of the linear model

◆ semidispersion()

template<typename Dataset >

real theoretica::stats::semidispersion ( const Dataset & X )

inline

Computes the maximum semidispersion of a data set defined as \((x_{max} - {x_min}) / 2\).

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset

Returns: The maximum semidispersion of the dataset

◆ skewness()

template<typename Dataset >

real theoretica::stats::skewness ( const Dataset & X )

inline

Compute the skewness of a dataset as \(\frac{\sum_{i=1}^n (\frac{x_i - \hat \mu}{\hat \sigma})^3}{n}\).

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset

Returns: The skewness of the dataset

◆ standard_relative_error()

template<typename Dataset >

real theoretica::stats::standard_relative_error ( const Dataset & X )

inline

Compute the relative error on a dataset using estimates of its mean and standard deviation, with the given number of constraints (defaults to 1 for Bessel's correction).

The relative error is computed as \(\epsilon_{rel} = \frac{\sigma}{\mu}\) and is not multiplied by 100.

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset
constraints	The number of constraints for the estimators (defaults to 1)

Returns: The standard relative error on the dataset

◆ stdev()

template<typename Dataset >

real theoretica::stats::stdev	(	const Dataset &	data,
		unsigned int	constraints = `1`
	)

inline

Compute the standard deviation given a dataset and the number of constraints.

Welford's one-pass method is used. The number of constraints defaults to 1, applying Bessel's correction. A value of 0 may be used to compute the population standard deviation.

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset
constraints	The number of constraints, defaults to 1

Returns: The standard deviation of the dataset

◆ stdom()

template<typename Dataset >

real theoretica::stats::stdom ( const Dataset & X )

inline

Compute the standard deviation of the mean given a dataset.

Welford's one-pass method is used and Bessel's correction is applied.

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset

Returns: The standard deviation of the mean

◆ total_sum_squares()

template<typename Dataset >

real theoretica::stats::total_sum_squares ( const Dataset & X )

inline

Compute the total sum of squares (TSS) of a given dataset as \(sum(square(x_i - x_{mean}))\) using Welford's one-pass method.

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset to compute the TSS of

Returns: The total sum of squares of the given dataset

◆ variance()

template<typename Dataset >

real theoretica::stats::variance	(	const Dataset &	X,
		unsigned int	constraints = `1`
	)

inline

Compute the variance given a dataset and the number of constraints.

Welford's one-pass method is used. The number of constraints defaults to 1, applying Bessel's correction. A value of 0 may be used to compute the population variance.

Template Parameters

Dataset Any type representing a dataset as a vector of values

Parameters

X	The dataset
constraints	The number of constraints, defaults to 1

Returns: The variance of the dataset

◆ z_score()

real theoretica::stats::z_score	(	real	x,
		real	mean,
		real	sigma
	)

inline

Compute the Z-score of an observed value with respect to a Gaussian distribution with the given parameters.

Parameters

x	The observed value
mean	The mean of the distribution
sigma	The standard deviation of the distribution

Returns: The Z-score of x, computed as (x - mean) / sigma

Functions

Detailed Description

Function Documentation

◆ absolute_deviation()

◆ autocorrelation()

◆ chi_square()

◆ chi_square_linear()

◆ correlation_coefficient()

◆ covar_mat()

◆ covariance()

◆ gaussian_expectation()

◆ kurtosis()

◆ likelihood()

◆ log_likelihood()

◆ mean()

◆ moments2()

◆ normalize_z_score()

◆ propagate_product()

◆ propagate_sum()

◆ propagerr() [1/3]

◆ propagerr() [2/3]

◆ propagerr() [3/3]

◆ propagerr_mc()

◆ pvalue_chi_squared()

◆ range()

◆ reduced_chi_square_linear()

◆ semidispersion()

◆ skewness()

◆ standard_relative_error()

◆ stdev()

◆ stdom()

◆ total_sum_squares()

◆ variance()

◆ z_score()