6 #ifndef THEORETICA_HISTOGRAM_H
7 #define THEORETICA_HISTOGRAM_H
9 #ifndef THEORETICA_NO_PRINT
15 #include "../core/real_analysis.h"
16 #include "../core/dataset.h"
35 std::vector<unsigned int> bin_counts;
66 : N(0), value_max(-
inf()), value_min(-
inf()), run_average(0), run_tss(0) {
68 bin_counts.resize(bin_count);
69 this->range_max = range_max;
70 this->range_min = range_min;
81 template<
typename Dataset, enable_vector<Dataset> = true>
82 histogram(
const Dataset& data,
unsigned int bin_count = 0) {
86 value_max = range_max;
87 value_min = range_min;
100 for (
size_t i = 0; i < N; ++i)
101 bin_counts[
index(data[i])]++;
111 if(x < range_min || x > range_max)
115 const real tmp = run_average;
116 run_average = tmp + (x - tmp) / (N + 1);
117 run_tss += (x - tmp) * (x - run_average);
119 value_max = value_max < x ? x : value_max;
120 value_min = value_min > x ? x : value_min;
122 bin_counts[
index(x)]++;
138 return bin_counts.size() - 1;
141 (x - range_min) / (range_max - range_min)
165 inline std::vector<unsigned int>
bins()
const {
213 if (x < range_min || x > range_max)
216 return bin_counts[
index(x)];
225 return bin_counts[i];
232 #ifndef THEORETICA_NO_PRINT
244 bool normalized =
true,
245 bool lower_extreme =
false)
const {
250 std::stringstream res;
251 const real width =
abs(range_max - range_min) / bin_counts.size();
257 for (
size_t i = 0; i < bin_counts.size(); ++i) {
259 res << (range_min + (i + mult) * width) << separator;
262 res << (bin_counts[i] / (
real) N) << std::endl;
264 res << bin_counts[i] << std::endl;
280 std::ostream& out,
const histogram& obj) {
310 if (h.number() <= 1) {
315 return h.tss() / (h.number() - 1);
Histogram class with running statistics, can be constructed from the parameters of the bins or from a...
Definition: histogram.h:28
histogram(unsigned int bin_count, real range_min, real range_max)
Construct the histogram from the number of bins and the range.
Definition: histogram.h:65
real min() const
Get the smallest data point of the histogram.
Definition: histogram.h:181
unsigned int index(real x) const
Find the bin index corresponding to a given data point.
Definition: histogram.h:135
real max() const
Get the biggest data point of the histogram.
Definition: histogram.h:173
real operator()(real x)
Evaluate the histogram like a step function which is zero outside the range of the histogram.
Definition: histogram.h:211
real tss() const
Get the total sum of squares (TSS) computed using Welford's one-pass method.
Definition: histogram.h:198
std::vector< unsigned int > bins() const
Get a vector containing the bin counts of each bin.
Definition: histogram.h:165
unsigned int operator[](unsigned int i) const
Get the number of elements in the i-th bin.
Definition: histogram.h:224
std::string to_string(const std::string &separator=" ", bool normalized=true, bool lower_extreme=false) const
TO-DO Cumulative Distribution Function.
Definition: histogram.h:242
unsigned int number() const
Get the number of data points inside the histogram.
Definition: histogram.h:154
histogram(const Dataset &data, unsigned int bin_count=0)
Construct the histogram from a set of data points, with the given number of bins.
Definition: histogram.h:82
void insert(real x)
Insert a new data point inside the histogram, updating the running statistics and the corresponding b...
Definition: histogram.h:109
real mean() const
Get the mean value of the histogram data.
Definition: histogram.h:189
friend std::ostream & operator<<(std::ostream &out, const histogram &obj)
Stream the histogram in string representation to an output stream (std::ostream)
Definition: histogram.h:279
#define TH_MATH_ERROR(F_NAME, VALUE, EXCEPTION)
TH_MATH_ERROR is a macro which throws exceptions or modifies errno (depending on which compiling opti...
Definition: error.h:219
std::string string(size_t length)
Generate a random string made of human-readable ASCII characters.
Definition: random.h:102
real stdev(const histogram &h)
Compute the standard deviation of the values of a histogram.
Definition: histogram.h:320
real total_sum_squares(const Dataset &X)
Compute the total sum of squares (TSS) of a given dataset as using Welford's one-pass method.
Definition: statistics.h:116
real variance(const histogram &h)
Compute the variance of the values of a histogram.
Definition: histogram.h:308
real mean(const histogram &h)
Compute the mean of the values of a histogram.
Definition: histogram.h:296
real tss(const histogram &h)
Compute the total sum of squares of the values of the histogram.
Definition: histogram.h:302
Main namespace of the library which contains all functions and objects.
Definition: algebra.h:27
double real
A real number, defined as a floating point type.
Definition: constants.h:188
auto min(const Vector &X)
Finds the minimum value inside a dataset.
Definition: dataset.h:351
dual2 sqrt(dual2 x)
Compute the square root of a second order dual number.
Definition: dual2_functions.h:54
real inf()
Return positive infinity in floating point representation.
Definition: error.h:76
dual2 abs(dual2 x)
Compute the absolute value of a second order dual number.
Definition: dual2_functions.h:183
auto max(const Vector &X)
Finds the maximum value inside a dataset.
Definition: dataset.h:330
constexpr real MACH_EPSILON
Machine epsilon for the real type.
Definition: constants.h:197
real nan()
Return a quiet NaN number in floating point representation.
Definition: error.h:54
TH_CONSTEXPR int floor(real x)
Compute the floor of x Computes the maximum integer number that is smaller than x.
Definition: real_analysis.h:271