Theoretica
A C++ numerical and automatic mathematical library
Loading...
Searching...
No Matches
histogram.h
Go to the documentation of this file.
1
5
6#ifndef THEORETICA_HISTOGRAM_H
7#define THEORETICA_HISTOGRAM_H
8
9#ifndef THEORETICA_NO_PRINT
10#include <sstream>
11#include <ostream>
12#endif
13
14#include <vector>
15#include "../core/real_analysis.h"
16#include "../core/dataset.h"
17#include "./statistics.h"
18
19
20namespace theoretica {
21
22
28 class histogram {
29 private:
30
32 size_t N {0};
33
35 std::vector<unsigned int> bin_counts;
36
38 real range_max;
39
41 real range_min;
42
44 real value_max;
45
47 real value_min;
48
50 real run_average;
51
53 real run_tss;
54
55 public:
56
65 histogram(unsigned int bin_count, real range_min, real range_max)
66 : N(0), value_max(-inf()), value_min(-inf()), run_average(0), run_tss(0) {
67
68 bin_counts.resize(bin_count);
69 this->range_max = range_max;
70 this->range_min = range_min;
71 }
72
73
81 template<typename Dataset, enable_vector<Dataset> = true>
82 histogram(const Dataset& data, unsigned int bin_count = 0) {
83
84 range_max = theoretica::max(data);
85 range_min = theoretica::min(data);
86 value_max = range_max;
87 value_min = range_min;
88 N = data.size();
89
90 // Compute mean and TSS
91 run_average = stats::mean(data);
92 run_tss = stats::total_sum_squares(data);
93
94 // Default bin count is sqrt(N)
95 bin_counts.resize(
97 );
98
99 // The histogram contains all the data points by construction
100 for (size_t i = 0; i < N; ++i)
101 bin_counts[index(data[i])]++;
102 }
103
104
109 inline void insert(real x) {
110
111 if(x < range_min || x > range_max)
112 return;
113
114 // Update average and TSS using Welford's method
115 const real tmp = run_average;
116 run_average = tmp + (x - tmp) / (N + 1);
117 run_tss += (x - tmp) * (x - run_average);
118
119 value_max = value_max < x ? x : value_max;
120 value_min = value_min > x ? x : value_min;
121
122 bin_counts[index(x)]++;
123 N++;
124 }
125
126
135 inline unsigned int index(real x) const {
136
137 if(abs(x - range_max) < MACH_EPSILON)
138 return bin_counts.size() - 1;
139
140 return floor(
141 (x - range_min) / (range_max - range_min)
142 * bin_counts.size()
143 );
144 }
145
146
147 // Statistical functions
148
149
154 inline unsigned int number() const {
155 return N;
156 }
157
158
165 inline std::vector<unsigned int> bins() const {
166 return bin_counts;
167 }
168
169
173 inline real max() const {
174 return value_max;
175 }
176
177
181 inline real min() const {
182 return value_min;
183 }
184
185
189 inline real mean() const {
190 return run_average;
191 }
192
193
198 inline real tss() const {
199 return run_tss;
200 }
201
202
203 // Operators
204
205
211 inline real operator()(real x) {
212
213 if (x < range_min || x > range_max)
214 return 0.0;
215
216 return bin_counts[index(x)];
217 }
218
219
224 inline unsigned int operator[](unsigned int i) const {
225 return bin_counts[i];
226 }
227
228
230
231
232#ifndef THEORETICA_NO_PRINT
233
242 inline std::string to_string(
243 const std::string& separator = " ",
244 bool normalized = true,
245 bool lower_extreme = false) const {
246
247 if(N == 0)
248 return "";
249
250 std::stringstream res;
251 const real width = abs(range_max - range_min) / bin_counts.size();
252 real mult = 0.5;
253
254 if (lower_extreme)
255 mult = 0.0;
256
257 for (size_t i = 0; i < bin_counts.size(); ++i) {
258
259 res << (range_min + (i + mult) * width) << separator;
260
261 if (normalized)
262 res << (bin_counts[i] / (real) N) << std::endl;
263 else
264 res << bin_counts[i] << std::endl;
265 }
266
267 return res.str();
268 }
269
270
272 inline operator std::string() {
273 return to_string();
274 }
275
276
279 inline friend std::ostream& operator<<(
280 std::ostream& out, const histogram& obj) {
281 return out << obj.to_string();
282 }
283
284#endif
285
286 };
287
288
289 // Statistical functions over elements of a histogram
290
291
292 namespace stats {
293
294
296 inline real mean(const histogram& h) {
297 return h.mean();
298 }
299
300
302 inline real tss(const histogram& h) {
303 return h.tss();
304 }
305
306
308 inline real variance(const histogram& h) {
309
310 if (h.number() <= 1) {
311 TH_MATH_ERROR("variance", h.number(), DIV_BY_ZERO);
312 return nan();
313 }
314
315 return h.tss() / (h.number() - 1);
316 }
317
318
320 inline real stdev(const histogram& h) {
321 return sqrt(variance(h));
322 }
323 }
324
325
327 inline real max(const histogram& h) {
328 return h.max();
329 }
330
331
333 inline real min(const histogram& h) {
334 return h.min();
335 }
336
337}
338
339#endif
Histogram class with running statistics, can be constructed from the parameters of the bins or from a...
Definition histogram.h:28
histogram(unsigned int bin_count, real range_min, real range_max)
Construct the histogram from the number of bins and the range.
Definition histogram.h:65
real min() const
Get the smallest data point of the histogram.
Definition histogram.h:181
unsigned int index(real x) const
Find the bin index corresponding to a given data point.
Definition histogram.h:135
friend std::ostream & operator<<(std::ostream &out, const histogram &obj)
Stream the histogram in string representation to an output stream (std::ostream)
Definition histogram.h:279
real max() const
Get the biggest data point of the histogram.
Definition histogram.h:173
real operator()(real x)
Evaluate the histogram like a step function which is zero outside the range of the histogram.
Definition histogram.h:211
real tss() const
Get the total sum of squares (TSS) computed using Welford's one-pass method.
Definition histogram.h:198
unsigned int operator[](unsigned int i) const
Get the number of elements in the i-th bin.
Definition histogram.h:224
std::string to_string(const std::string &separator=" ", bool normalized=true, bool lower_extreme=false) const
TO-DO Cumulative Distribution Function.
Definition histogram.h:242
unsigned int number() const
Get the number of data points inside the histogram.
Definition histogram.h:154
histogram(const Dataset &data, unsigned int bin_count=0)
Construct the histogram from a set of data points, with the given number of bins.
Definition histogram.h:82
void insert(real x)
Insert a new data point inside the histogram, updating the running statistics and the corresponding b...
Definition histogram.h:109
real mean() const
Get the mean value of the histogram data.
Definition histogram.h:189
std::vector< unsigned int > bins() const
Get a vector containing the bin counts of each bin.
Definition histogram.h:165
#define TH_MATH_ERROR(F_NAME, VALUE, EXCEPTION)
TH_MATH_ERROR is a macro which throws exceptions or modifies errno (depending on which compiling opti...
Definition error.h:219
real stdev(const histogram &h)
Compute the standard deviation of the values of a histogram.
Definition histogram.h:320
real total_sum_squares(const Dataset &X)
Compute the total sum of squares (TSS) of a given dataset as using Welford's one-pass method.
Definition statistics.h:116
real variance(const histogram &h)
Compute the variance of the values of a histogram.
Definition histogram.h:308
real mean(const histogram &h)
Compute the mean of the values of a histogram.
Definition histogram.h:296
real tss(const histogram &h)
Compute the total sum of squares of the values of the histogram.
Definition histogram.h:302
Main namespace of the library which contains all functions and objects.
Definition algebra.h:27
double real
A real number, defined as a floating point type.
Definition constants.h:198
auto min(const Vector &X)
Finds the minimum value inside a dataset.
Definition dataset.h:351
dual2 sqrt(dual2 x)
Compute the square root of a second order dual number.
Definition dual2_functions.h:54
real inf()
Return positive infinity in floating point representation.
Definition error.h:76
dual2 abs(dual2 x)
Compute the absolute value of a second order dual number.
Definition dual2_functions.h:198
std::remove_reference_t< decltype(std::declval< Structure >()[0])> vector_element_t
Extract the type of a vector (or any indexable container) from its operator[].
Definition core_traits.h:134
auto max(const Vector &X)
Finds the maximum value inside a dataset.
Definition dataset.h:330
constexpr real MACH_EPSILON
Machine epsilon for the real type.
Definition constants.h:207
real nan()
Return a quiet NaN number in floating point representation.
Definition error.h:54
TH_CONSTEXPR int floor(real x)
Compute the floor of x Computes the maximum integer number that is smaller than x.
Definition real_analysis.h:271
Statistical functions.