Theoretica
Scientific Computing
Loading...
Searching...
No Matches
histogram.h
Go to the documentation of this file.
1
5
6#ifndef THEORETICA_HISTOGRAM_H
7#define THEORETICA_HISTOGRAM_H
8
9#ifndef THEORETICA_NO_PRINT
10#include <sstream>
11#include <ostream>
12#endif
13
14#include <vector>
15#include "../core/real_analysis.h"
16#include "../core/dataset.h"
17#include "./statistics.h"
18
19
20namespace theoretica {
21
22
28 class histogram {
29 private:
30
32 size_t N {0};
33
35 std::vector<unsigned int> bin_counts {};
36
38 real range_max {nan()};
39
41 real range_min {nan()};
42
44 real value_max {nan()};
45
47 real value_min {nan()};
48
50 real run_average {0};
51
53 real run_tss {0};
54
55 public:
56
61 histogram() = default;
62
71 histogram(unsigned int bin_count, real range_min, real range_max)
72 : N(0), value_max(-inf()), value_min(-inf()), run_average(0), run_tss(0) {
73
74 bin_counts.resize(bin_count);
75 this->range_max = range_max;
76 this->range_min = range_min;
77 }
78
79
87 template<typename Dataset, enable_vector<Dataset> = true>
88 histogram(const Dataset& data, unsigned int bin_count = 0) {
89
90 range_max = theoretica::max(data);
91 range_min = theoretica::min(data);
92 value_max = range_max;
93 value_min = range_min;
94 N = data.size();
95
96 // Compute mean and TSS
97 run_average = stats::mean(data);
98 run_tss = stats::total_sum_squares(data);
99
100 // Default bin count is sqrt(N)
101 bin_counts.resize(
103 );
104
105 // The histogram contains all the data points by construction
106 for (size_t i = 0; i < N; ++i)
107 bin_counts[index(data[i])]++;
108 }
109
110
115 inline void insert(real x) {
116
117 if(x < range_min || x > range_max)
118 return;
119
120 // Update average and TSS using Welford's method
121 const real tmp = run_average;
122 run_average = tmp + (x - tmp) / (N + 1);
123 run_tss += (x - tmp) * (x - run_average);
124
125 value_max = value_max < x ? x : value_max;
126 value_min = value_min > x ? x : value_min;
127
128 bin_counts[index(x)]++;
129 N++;
130 }
131
132
141 inline unsigned int index(real x) const {
142
143 if(abs(x - range_max) < MACH_EPSILON)
144 return bin_counts.size() - 1;
145
146 return floor(
147 (x - range_min) / (range_max - range_min)
148 * bin_counts.size()
149 );
150 }
151
152
158 inline vec2 range() const {
159 return vec2({range_min, range_max});
160 }
161
162
163 // Statistical functions
164
165
170 inline unsigned int number() const {
171 return N;
172 }
173
174
181 inline std::vector<unsigned int> bins() const {
182 return bin_counts;
183 }
184
185
187 inline real range_lower() const {
188 return range_min;
189 }
190
191
193 inline real range_upper() const {
194 return range_max;
195 }
196
197
201 inline real max() const {
202 return value_max;
203 }
204
205
209 inline real min() const {
210 return value_min;
211 }
212
213
217 inline real mean() const {
218 return run_average;
219 }
220
221
226 inline real tss() const {
227 return run_tss;
228 }
229
230
233 inline void rebuild(
234 const std::vector<unsigned int>& bin_counts,
235 const vec2& range, size_t N, real run_average, real run_tss,
236 real value_min, real value_max) {
237
238 this->bin_counts = bin_counts;
239 this->range_min = range[0];
240 this->range_max = range[1];
241 this->N = N;
242 this->run_average = run_average;
243 this->run_tss = run_tss;
244 this->value_min = value_min;
245 this->value_max = value_max;
246 }
247
248
249 // Operators
250
251
257 inline real operator()(real x) {
258
259 if (x < range_min || x > range_max)
260 return 0.0;
261
262 return bin_counts[index(x)];
263 }
264
265
270 inline unsigned int operator[](unsigned int i) const {
271 return bin_counts[i];
272 }
273
274
275 // TO-DO Cumulative Distribution Function
276
277
278#ifndef THEORETICA_NO_PRINT
279
288 inline std::string to_string(
289 const std::string& separator = " ",
290 bool normalized = true,
291 bool lower_extreme = false) const {
292
293 if(N == 0)
294 return "";
295
296 std::stringstream res;
297 const real width = abs(range_max - range_min) / bin_counts.size();
298 real mult = 0.5;
299
300 if (lower_extreme)
301 mult = 0.0;
302
303 for (size_t i = 0; i < bin_counts.size(); ++i) {
304
305 res << (range_min + (i + mult) * width) << separator;
306
307 if (normalized)
308 res << (bin_counts[i] / (real) N) << std::endl;
309 else
310 res << bin_counts[i] << std::endl;
311 }
312
313 return res.str();
314 }
315
316
318 inline operator std::string() {
319 return to_string();
320 }
321
322
325 inline friend std::ostream& operator<<(
326 std::ostream& out, const histogram& obj) {
327 return out << obj.to_string();
328 }
329
330#endif
331
332 };
333
334
335 // Statistical functions over elements of a histogram
336
337
338 namespace stats {
339
340
342 inline real mean(const histogram& h) {
343 return h.mean();
344 }
345
346
348 inline real tss(const histogram& h) {
349 return h.tss();
350 }
351
352
354 inline real variance(const histogram& h) {
355
356 if (h.number() <= 1) {
357 TH_MATH_ERROR("variance", h.number(), MathError::DivByZero);
358 return nan();
359 }
360
361 return h.tss() / (h.number() - 1);
362 }
363
364
366 inline real stdev(const histogram& h) {
367 return sqrt(variance(h));
368 }
369 }
370
371
373 inline real max(const histogram& h) {
374 return h.max();
375 }
376
377
379 inline real min(const histogram& h) {
380 return h.min();
381 }
382
383}
384
385#endif
Histogram class with running statistics, can be constructed from the parameters of the bins or from a...
Definition histogram.h:28
real range_lower() const
Return the lower extreme of the histogram range.
Definition histogram.h:187
void rebuild(const std::vector< unsigned int > &bin_counts, const vec2 &range, size_t N, real run_average, real run_tss, real value_min, real value_max)
Rebuild the histogram from its parameters, including the bin counts and the running statistics (used ...
Definition histogram.h:233
histogram(unsigned int bin_count, real range_min, real range_max)
Construct the histogram from the number of bins and the range.
Definition histogram.h:71
real min() const
Get the smallest data point of the histogram.
Definition histogram.h:209
unsigned int index(real x) const
Find the bin index corresponding to a given data point.
Definition histogram.h:141
friend std::ostream & operator<<(std::ostream &out, const histogram &obj)
Stream the histogram in string representation to an output stream (std::ostream)
Definition histogram.h:325
real max() const
Get the biggest data point of the histogram.
Definition histogram.h:201
histogram()=default
Default constructor, creates an empty histogram with no bins and NaN range.
real operator()(real x)
Evaluate the histogram like a step function which is zero outside the range of the histogram.
Definition histogram.h:257
vec2 range() const
Get the histogram range as a vector of two elements, containing the lower and upper extremes.
Definition histogram.h:158
real tss() const
Get the total sum of squares (TSS) computed using Welford's one-pass method.
Definition histogram.h:226
real range_upper() const
Return the upper extreme of the histogram range.
Definition histogram.h:193
unsigned int operator[](unsigned int i) const
Get the number of elements in the i-th bin.
Definition histogram.h:270
std::string to_string(const std::string &separator=" ", bool normalized=true, bool lower_extreme=false) const
Convert the histogram to string representation.
Definition histogram.h:288
unsigned int number() const
Get the number of data points inside the histogram.
Definition histogram.h:170
histogram(const Dataset &data, unsigned int bin_count=0)
Construct the histogram from a set of data points, with the given number of bins.
Definition histogram.h:88
void insert(real x)
Insert a new data point inside the histogram, updating the running statistics and the corresponding b...
Definition histogram.h:115
real mean() const
Get the mean value of the histogram data.
Definition histogram.h:217
std::vector< unsigned int > bins() const
Get a vector containing the bin counts of each bin.
Definition histogram.h:181
A statically allocated N-dimensional vector with elements of the given type.
Definition vec.h:92
#define TH_MATH_ERROR(F_NAME, VALUE, EXCEPTION)
TH_MATH_ERROR is a macro which throws exceptions or modifies errno (depending on which compilation op...
Definition error.h:219
real stdev(const histogram &h)
Compute the standard deviation of the values of a histogram.
Definition histogram.h:366
real total_sum_squares(const Dataset &X)
Compute the total sum of squares (TSS) of a given dataset as using Welford's one-pass method.
Definition statistics.h:116
real variance(const histogram &h)
Compute the variance of the values of a histogram.
Definition histogram.h:354
real mean(const histogram &h)
Compute the mean of the values of a histogram.
Definition histogram.h:342
real tss(const histogram &h)
Compute the total sum of squares of the values of the histogram.
Definition histogram.h:348
Main namespace of the library which contains all functions and objects.
Definition algebra.h:27
double real
A real number, defined as a floating point type.
Definition constants.h:207
auto min(const Vector &X)
Finds the minimum value inside a dataset.
Definition dataset.h:347
dual2 sqrt(dual2 x)
Compute the square root of a second order dual number.
Definition dual2_functions.h:54
vec< real, 2 > vec2
A 2-dimensional vector with real elements.
Definition algebra_types.h:39
dual2 abs(dual2 x)
Compute the absolute value of a second order dual number.
Definition dual2_functions.h:242
Vector make_error()
Create a vector representing an error state, with all NaN values.
Definition algebra.h:103
auto max(const Vector &X)
Finds the maximum value inside a dataset.
Definition dataset.h:326
TH_CONSTEXPR real nan()
Return a quiet NaN number in floating point representation.
Definition error.h:74
@ DivByZero
Division by zero.
constexpr real MACH_EPSILON
Machine epsilon for the real type.
Definition constants.h:216
TH_CONSTEXPR real inf()
Get positive infinity in floating point representation.
Definition error.h:96
TH_CONSTEXPR int floor(real x)
Compute the floor of x, as the maximum integer number that is smaller than x.
Definition real_analysis.h:271
Statistical functions.