6 #ifndef THEORETICA_REGRESSION_H
7 #define THEORETICA_REGRESSION_H
10 #include "../algebra/mat.h"
16 namespace regression {
21 template<
typename Dataset1,
typename Dataset2>
23 const Dataset1& X,
const Dataset2& Y,
27 if(X.size() != Y.size()) {
28 TH_MATH_ERROR(
"regression::ols_linear", X.size(), INVALID_ARGUMENT);
29 intercept =
nan(); slope =
nan();
36 const real Delta = X.size() * sum_sqr_x - sqr_sum_x;
43 intercept =
nan(); slope =
nan();
49 intercept = (sum_sqr_x * sum_y - sum_x * prod_sum_xy) / Delta;
50 slope = (X.size() * prod_sum_xy - sum_x * sum_y) / Delta;
56 template<
typename Dataset1,
typename Dataset2>
58 const Dataset1& X,
const Dataset2& Y,
real sigma_Y,
63 if(X.size() != Y.size()) {
64 TH_MATH_ERROR(
"regression::ols_linear", X.size(), INVALID_ARGUMENT);
65 intercept =
nan(); slope =
nan();
72 const real Delta = X.size() * sum_sqr_x - sqr_sum_x;
79 intercept =
nan(); slope =
nan();
85 intercept = (sum_sqr_x * sum_y - sum_x * sum_xy) / Delta;
86 slope = (X.size() * sum_xy - sum_x * sum_y) / Delta;
89 sigma_A =
sqrt(
square(sigma_Y) * sum_sqr_x / Delta);
90 sigma_B =
sqrt(
square(sigma_Y) * X.size() / Delta);
96 template<
typename Dataset1,
typename Dataset2>
98 const Dataset1& X,
const Dataset2& Y,
real sigma_Y,
102 if(X.size() != Y.size()) {
103 TH_MATH_ERROR(
"regression::ols_linear", X.size(), INVALID_ARGUMENT);
112 const real Delta = X.size() * sum_sqr_x - sqr_sum_x;
119 intercept =
nan(); slope =
nan();
125 intercept = (sum_sqr_x * sum_y - sum_x * sum_xy) / Delta;
126 slope = (X.size() * sum_xy - sum_x * sum_y) / Delta;
138 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
140 const Dataset1& X,
const Dataset2& Y,
const Dataset3& W,
143 if(X.size() != Y.size() || X.size() != W.size()) {
145 intercept =
nan(); slope =
nan();
155 const real Delta = sum_w * sum_xxw -
square(sum_xw);
159 intercept =
nan(); slope =
nan();
163 intercept = (sum_xxw * sum_yw - sum_xw * sum_xyw) / Delta;
164 slope = (sum_w * sum_xyw - sum_xw * sum_yw) / Delta;
176 template<
typename Dataset1,
typename Dataset2>
178 const Dataset1& X,
const Dataset2& Y,
183 if(X.size() != Y.size()) {
195 const real delta_xy = (
product_sum(X, Y) / X.size()) - mean_x * mean_y;
198 slope = (Delta +
sqrt(
199 square(Delta) + 4 *
square(sigma_X * sigma_Y * delta_xy)
200 )) / (2 *
square(sigma_X) * delta_xy);
202 intercept = mean_y - slope * mean_x;
210 template<
typename Dataset1,
typename Dataset2>
212 const Dataset1& X,
const Dataset2& Y,
real sigma_Y,
215 if(X.size() != Y.size()) {
216 TH_MATH_ERROR(
"ols_linear_orig", X.size(), INVALID_ARGUMENT);
231 sigma_B =
sqrt(X.size() *
square(sigma_Y / sum_x));
236 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
238 const Dataset1& X,
const Dataset2& Y,
239 const Dataset3& W,
real& B,
real& sigma_B) {
241 if(X.size() != Y.size() || X.size() != W.size()) {
242 TH_MATH_ERROR(
"wls_linear_orig", X.size(), INVALID_ARGUMENT);
257 sigma_B =
sqrt(
sum(W) / sum_xw);
263 template<
typename Dataset1,
typename Dataset2>
265 const Dataset1& X,
const Dataset2& Y,
268 if(X.size() != Y.size()) {
269 TH_MATH_ERROR(
"ols_linear_error", X.size(), INVALID_ARGUMENT);
274 for (
unsigned int i = 0; i < X.size(); ++i) {
275 err +=
square(Y[i] - intercept - slope * X[i]);
279 return sqrt(err / (
real) (X.size() - 2));
324 template<
typename Dataset1,
typename Dataset2>
332 template<
typename Dataset1,
typename Dataset2>
340 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
342 const Dataset1& X,
const Dataset2& Y,
const Dataset3& sigma_Y) {
349 template<
typename Dataset1,
typename Dataset2>
351 const Dataset1& X,
const Dataset2& Y,
real sigma_X,
real sigma_Y) {
352 fit(X, Y, sigma_X, sigma_Y);
363 template<
typename Dataset1,
typename Dataset2>
364 inline void fit(
const Dataset1& X,
const Dataset2& Y) {
366 if(X.size() != Y.size()) {
367 TH_MATH_ERROR(
"linear_model::fit", X.size(), INVALID_ARGUMENT);
373 TH_MATH_ERROR(
"linear_model::fit", Y.size(), INVALID_ARGUMENT);
399 template<
typename Dataset1,
typename Dataset2>
401 const Dataset1& X,
const Dataset2& Y,
real sigma_Y) {
403 if(X.size() != Y.size()) {
404 TH_MATH_ERROR(
"linear_model::fit", X.size(), INVALID_ARGUMENT);
410 TH_MATH_ERROR(
"linear_model::fit", Y.size(), INVALID_ARGUMENT);
442 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
444 const Dataset1& X,
const Dataset2& Y,
const Dataset3& sigma) {
446 if(X.size() != Y.size()) {
447 TH_MATH_ERROR(
"linear_model::fit", X.size(), INVALID_ARGUMENT);
453 TH_MATH_ERROR(
"linear_model::fit", Y.size(), INVALID_ARGUMENT);
461 return 1.0 / (x * x);
485 template<
typename Dataset1,
typename Dataset2>
487 const Dataset1& X,
const Dataset2& Y,
real sigma_X,
real sigma_Y) {
489 if(X.size() != Y.size()) {
490 TH_MATH_ERROR(
"linear_model::fit", X.size(), INVALID_ARGUMENT);
496 TH_MATH_ERROR(
"linear_model::fit", Y.size(), INVALID_ARGUMENT);
523 #ifndef THEORETICA_NO_PRINT
528 std::stringstream res;
530 res <<
"Model: y = A + B * x" << std::endl;
542 res <<
"Error = " <<
err << std::endl;
543 res <<
"ndf = " <<
ndf << std::endl;
546 res <<
"Chi-squared = " <<
chi_squared << std::endl;
549 res <<
"p-value = " <<
p_value << std::endl;
552 res <<
"Covariance Matrix:\n" <<
covar_mat;
577 template<
typename Dataset1,
typename Dataset2>
579 const Dataset1& X,
const Dataset2& Y) {
581 if(X.size() != Y.size()) {
582 TH_MATH_ERROR(
"ols_linear_intercept", X.size(), INVALID_ARGUMENT);
595 template<
typename Dataset1,
typename Dataset2>
597 const Dataset1& X,
const Dataset2& Y,
real sigma_y) {
606 template<
typename Dataset1,
typename Dataset2>
609 if(X.size() != Y.size()) {
610 TH_MATH_ERROR(
"ols_linear_slope", X.size(), INVALID_ARGUMENT);
623 template<
typename Dataset1,
typename Dataset2>
625 const Dataset1& X,
const Dataset2& Y,
real sigma_y) {
628 return sqrt(X.size() / Delta) *
abs(sigma_y);
634 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
636 const Dataset1& X,
const Dataset2& Y,
const Dataset3& W) {
638 if(X.size() != Y.size() || X.size() != W.size()) {
640 "wls_linear_intercept",
641 X.size(), INVALID_ARGUMENT);
657 template<
typename Dataset1,
typename Dataset2,
typename Dataset3>
659 const Dataset1& X,
const Dataset2& Y,
const Dataset3& W) {
661 if(X.size() != Y.size() || X.size() != W.size()) {
664 X.size(), INVALID_ARGUMENT);
#define TH_MATH_ERROR(F_NAME, VALUE, EXCEPTION)
TH_MATH_ERROR is a macro which throws exceptions or modifies errno (depending on which compiling opti...
Definition: error.h:219
std::string string(size_t length)
Generate a random string made of human-readable ASCII characters.
Definition: random.h:102
void wls_linear(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &W, real &intercept, real &slope, mat2 &covar_mat)
Compute the coefficients of the linear regression using Weighted Least Squares.
Definition: regression.h:139
void ols_linear_orig(const Dataset1 &X, const Dataset2 &Y, real sigma_Y, real &B, real &sigma_B)
Compute the Ordinary Least Squares regression to a line passing through the origin.
Definition: regression.h:211
real ols_linear_slope(const Dataset1 &X, const Dataset2 &Y)
Compute the slope of the least squares linear regression from X and Y.
Definition: regression.h:607
real ols_linear_sigma_B(const Dataset1 &X, const Dataset2 &Y, real sigma_y)
Compute the error on the slope coefficient (B)
Definition: regression.h:624
void wls_linear_orig(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &W, real &B, real &sigma_B)
Compute the Weight Least Squares regression to a line passing through the origin.
Definition: regression.h:237
real ols_linear_error(const Dataset1 &X, const Dataset2 &Y, real intercept, real slope)
Compute the error of the least squares linear regression from the X and Y datasets.
Definition: regression.h:264
real ols_linear_intercept(const Dataset1 &X, const Dataset2 &Y)
Compute the intercept of the least squares linear regression from X and Y.
Definition: regression.h:578
void ols_linear(const Dataset1 &X, const Dataset2 &Y, real &intercept, real &slope)
Compute the coefficients of the linear regression using Ordinary Least Squares.
Definition: regression.h:22
real wls_linear_slope(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &W)
Compute the slope of the weighted least squares linear regression from X and Y.
Definition: regression.h:658
real ols_linear_sigma_A(const Dataset1 &X, const Dataset2 &Y, real sigma_y)
Compute the error on the intercept (A)
Definition: regression.h:596
real wls_linear_intercept(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &W)
Compute the intercept of the weighted least squares linear regression from X and Y.
Definition: regression.h:635
real pvalue_chi_squared(real chi_sqr, unsigned int ndf)
Compute the (right-tailed) p-value associated to a computed Chi-square value as the integral of the C...
Definition: statistics.h:489
real chi_square_linear(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &sigma, real intercept, real slope)
Compute the chi-square on a linear regression, as the sum of the squares of the residuals divided by ...
Definition: statistics.h:566
real mean(const histogram &h)
Compute the mean of the values of a histogram.
Definition: histogram.h:296
Matrix covar_mat(const std::vector< Dataset > &v)
Build the covariance matrix given a vector of datasets by computing the covariance between all couple...
Definition: errorprop.h:32
Main namespace of the library which contains all functions and objects.
Definition: algebra.h:27
double real
A real number, defined as a floating point type.
Definition: constants.h:188
dual2 sqrt(dual2 x)
Compute the square root of a second order dual number.
Definition: dual2_functions.h:54
bool is_nan(const T &x)
Check whether a generic variable is (equivalent to) a NaN number.
Definition: error.h:70
dual2 abs(dual2 x)
Compute the absolute value of a second order dual number.
Definition: dual2_functions.h:183
auto sum(const Vector &X)
Compute the sum of a vector of real values using pairwise summation to reduce round-off error.
Definition: dataset.h:219
Vector & apply(Function f, Vector &X)
Apply a function to a set of values element-wise.
Definition: dataset.h:250
mat< real, 2, 2 > mat2
A 2x2 matrix with real entries.
Definition: algebra_types.h:17
auto product_sum(const Vector &X, const Vector &Y)
Sum the products of two sets of values.
Definition: dataset.h:46
constexpr real MACH_EPSILON
Machine epsilon for the real type.
Definition: constants.h:197
dual2 square(dual2 x)
Return the square of a second order dual number.
Definition: dual2_functions.h:23
real nan()
Return a quiet NaN number in floating point representation.
Definition: error.h:54
auto sum_squares(const Vector &X)
Sum the squares of a set of values.
Definition: dataset.h:127
structure for computation and storage of least squares linear regression results with model .
Definition: regression.h:286
void fit(const Dataset1 &X, const Dataset2 &Y, real sigma_X, real sigma_Y)
Compute the linear regression of two sets of data of the same size using least squares linear regress...
Definition: regression.h:486
linear_model()
Default constructor.
Definition: regression.h:319
void fit(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &sigma)
Compute the linear regression of two sets of data of the same size using least squares linear regress...
Definition: regression.h:443
linear_model(const Dataset1 &X, const Dataset2 &Y, const Dataset3 &sigma_Y)
Construct a linear model from data and compute the fit.
Definition: regression.h:341
real B
Slope.
Definition: regression.h:295
mat2 covar_mat
Covariance matrix of the coefficients A and B.
Definition: regression.h:301
unsigned int ndf
Number of degrees of freedom of the linear regression.
Definition: regression.h:311
real chi_squared
Chi-squared on linearization.
Definition: regression.h:307
linear_model(const Dataset1 &X, const Dataset2 &Y, real sigma_X, real sigma_Y)
Construct a linear model from data and compute the fit.
Definition: regression.h:350
friend std::ostream & operator<<(std::ostream &out, const linear_model &obj)
Stream the vector in string representation to an output stream (std::ostream)
Definition: regression.h:565
real sigma_A
Estimated error on A.
Definition: regression.h:292
real sigma_B
Estimated error on B.
Definition: regression.h:298
linear_model(const Dataset1 &X, const Dataset2 &Y, real sigma_Y)
Construct a linear model from data and compute the fit.
Definition: regression.h:333
real p_value
The p-value associated to the computed Chi-squared.
Definition: regression.h:315
void fit(const Dataset1 &X, const Dataset2 &Y, real sigma_Y)
Compute the linear regression of two sets of data of the same size using ordinary least squares linea...
Definition: regression.h:400
real operator()(real x)
Compute the expected Y value for the given X value, following the computed model.
Definition: regression.h:518
linear_model(const Dataset1 &X, const Dataset2 &Y)
Construct a linear model from data and compute the fit.
Definition: regression.h:325
real err
Total error on linearization.
Definition: regression.h:304
std::string to_string() const
Convert the vector to string representation.
Definition: regression.h:526
void fit(const Dataset1 &X, const Dataset2 &Y)
Compute the linear regression of two sets of data of the same size using ordinary least squares linea...
Definition: regression.h:364
real A
Intercept.
Definition: regression.h:289