Theoretica
Mathematical Library
Loading...
Searching...
No Matches
data_table.h
Go to the documentation of this file.
1
5
6#ifndef THEORETICA_IO_DATA_TABLE_H
7#define THEORETICA_IO_DATA_TABLE_H
8
9#include <map>
10#include <vector>
11#include <unordered_map>
12#include <string>
13#include <iomanip>
14
15#include "../core/constants.h"
16#include "../algebra/vec.h"
17#include "../algebra/mat.h"
18
19
20namespace theoretica {
21
22
26 class data_table {
27 private:
28
29 // Vector of data columns.
30 std::vector<vec<real>> columns {};
31
32 // Ordered list of column names
33 std::vector<std::string> column_names {};
34
35 // Map of column names to their indices in the data table
36 std::unordered_map<std::string, size_t> indices {};
37
38 public:
39
42
43
48 data_table(const std::map<std::string, vec<real>>& table) {
49
50 for (const auto& pair : table) {
51 column_names.emplace_back(pair.first);
52 columns.emplace_back(pair.second);
53 indices[pair.first] = columns.size() - 1;
54 }
55 }
56
57
62 data_table(size_t num_rows, const std::vector<std::string>& column_names) {
63
64 this->column_names = column_names;
65 this->columns.resize(column_names.size(), vec<real>(num_rows));
66
67 for (size_t i = 0; i < column_names.size(); ++i)
68 indices[column_names[i]] = i;
69 }
70
71
74 columns = other.columns;
75 column_names = other.column_names;
76 indices = other.indices;
77 }
78
81
82 columns = std::move(other.columns);
83 column_names = std::move(other.column_names);
84 indices = std::move(other.indices);
85 other.columns.clear();
86 other.column_names.clear();
87 other.indices.clear();
88 }
89
90
95 inline size_t rows() const {
96
97 if (columns.empty())
98 return 0;
99
100 unsigned int max_rows = 0;
101 for (const auto& col : columns)
102 if (col.size() > max_rows)
103 max_rows = col.size();
104
105 return max_rows;
106 }
107
108
111 inline size_t cols() const {
112 return columns.size();
113 }
114
115
118 inline size_t size() const {
119
120 size_t sum = 0;
121 for (const auto& col : columns)
122 sum += col.size();
123
124 return sum;
125 }
126
127
130 inline bool empty() const {
131 return size() == 0;
132 }
133
134
136 inline void clear() {
137 columns.clear();
138 column_names.clear();
139 indices.clear();
140 }
141
142
145 inline std::vector<std::string> header() const {
146 return column_names;
147 }
148
149
153 inline bool has_column(const std::string& name) const {
154 return indices.find(name) != indices.end();
155 }
156
157
162 inline std::vector<vec<real>>& data() {
163 return columns;
164 }
165
166
170 inline const std::vector<vec<real>>& data() const {
171 return columns;
172 }
173
174
180 inline vec<real>& operator[](const std::string& name) {
181
182 auto it = indices.find(name);
183 if (it == indices.end()) {
184 throw std::out_of_range("data_table::operator[]: column '" + name + "' not found");
185 }
186
187 return columns[it->second];
188 }
189
190
195 inline const vec<real>& operator[](const std::string& name) const {
196
197 auto it = indices.find(name);
198 if (it == indices.end()) {
199 throw std::out_of_range("data_table::operator[]: column '" + name + "' not found");
200 }
201
202 return columns[it->second];
203 }
204
205
210 inline vec<real>& at(const std::string& name) {
211
212 auto it = indices.find(name);
213 if (it == indices.end()) {
214 throw std::out_of_range("data_table::at: column '" + name + "' not found");
215 }
216
217 return columns[it->second];
218 }
219
220
226 inline vec<real>& operator[](size_t idx) {
227 return columns[idx];
228 }
229
230
240 inline data_table select(const std::vector<std::string>& cols) const {
241
243
244 for (const auto& col_name : cols) {
245
246 auto it = indices.find(col_name);
247 if (it != indices.end()) {
248 result.column_names.emplace_back(col_name);
249 result.columns.emplace_back(columns[it->second]);
250 result.indices[col_name] = result.columns.size() - 1;
251 }
252 }
253
254 return result;
255 }
256
257
264 inline real& at(const std::string& col, size_t row) {
265
266 auto it = indices.find(col);
267 if (it == indices.end()) {
268 throw std::out_of_range("data_table::at: column '" + col + "' not found");
269 }
270
271 vec<real>& column_vec = columns[it->second];
272 if (row >= column_vec.size()) {
273 throw std::out_of_range("data_table::at: row index " + std::to_string(row) + " out of range");
274 }
275
276 return column_vec[row];
277 }
278
279
286 inline const real& at(const std::string& col, size_t row) const {
287
288 auto it = indices.find(col);
289 if (it == indices.end()) {
290 throw std::out_of_range("data_table::at: column '" + col + "' not found");
291 }
292
293 const vec<real>& column_vec = columns[it->second];
294 if (row >= column_vec.size()) {
295 throw std::out_of_range("data_table::at: row index " + std::to_string(row) + " out of range");
296 }
297
298 return column_vec[row];
299 }
300
301
307 inline std::unordered_map<std::string, real> row(size_t idx) const {
308
309 std::unordered_map<std::string, real> result;
310
311 for (size_t i = 0; i < columns.size(); ++i) {
312 const real value = idx < columns[i].size() ? columns[i][idx] : nan();
313 result[column_names[i]] = value;
314 }
315
316 return result;
317 }
318
319
325 inline vec<real> row_vec(size_t idx) const {
326
327 vec<real> result(columns.size());
328
329 for (size_t i = 0; i < columns.size(); ++i)
330 result[i] = idx < columns[i].size() ? columns[i][idx] : nan();
331
332 return result;
333 }
334
335
341 inline data_table head(size_t n = 5) const {
342
344
345 for (size_t i = 0; i < columns.size(); ++i) {
346
347 vec<real> column_head (min(n, columns[i].size()));
348
349 for (size_t j = 0; j < column_head.size(); ++j)
350 column_head[j] = columns[i][j];
351
352 result.column_names.emplace_back(column_names[i]);
353 result.columns.emplace_back(column_head);
354 result.indices[column_names[i]] = result.columns.size() - 1;
355 }
356
357 return result;
358 }
359
360
365 inline data_table tail(size_t n = 5) const {
366
368
369 for (size_t i = 0; i < columns.size(); ++i) {
370
371 vec<real> column_tail(min(n, columns[i].size()));
372
373 for (size_t j = 0; j < column_tail.size(); ++j)
374 column_tail[j] = columns[i][columns[i].size() - column_tail.size() + j];
375
376 result.column_names.emplace_back(column_names[i]);
377 result.columns.emplace_back(column_tail);
378 result.indices[column_names[i]] = result.columns.size() - 1;
379 }
380
381 return result;
382 }
383
384
390 inline data_table& insert(const std::string& name, const vec<real>& data) {
391
392 auto it = indices.find(name);
393 if (it != indices.end()) {
394 columns[it->second] = data;
395 } else {
396 column_names.emplace_back(name);
397 columns.emplace_back(data);
398 indices[name] = columns.size() - 1;
399 }
400
401 return *this;
402 }
403
404
411 inline data_table& insert(const std::string& name, size_t num_rows, real value = 0.0) {
412
413 vec<real> data (num_rows, value);
414 insert(name, data);
415
416 return *this;
417 }
418
419
424 inline data_table& drop_column(const std::string& name) {
425
426 auto it = indices.find(name);
427 if (it != indices.end()) {
428
429 size_t idx = it->second;
430 columns.erase(columns.begin() + idx);
431 column_names.erase(column_names.begin() + idx);
432 indices.erase(it);
433
434 // Update indices of remaining columns
435 for (size_t i = idx; i < column_names.size(); ++i)
436 indices[column_names[i]] = i;
437 }
438
439 return *this;
440 }
441
442
448 inline data_table& drop_columns(const std::vector<std::string>& names) {
449
450 for (const auto& name : names)
451 drop_column(name);
452
453 return *this;
454 }
455
456
462 inline data_table& rename(const std::string& old_name, const std::string& new_name) {
463
464 auto it = indices.find(old_name);
465 if (it != indices.end()) {
466
467 size_t i = it->second;
468 column_names[i] = new_name;
469 indices.erase(it);
470 indices[new_name] = i;
471 }
472
473 return *this;
474 }
475
476
482 inline mat<real> to_matrix() const {
483
484 const size_t row_size = rows();
485 mat<real> result (row_size, columns.size());
486
487 for (size_t i = 0; i < columns.size(); ++i) {
488
489 for (size_t j = 0; j < row_size; ++j)
490 result(j, i) = j < columns[i].size() ? columns[i][j] : nan();
491 }
492
493 return result;
494 }
495
496
505 inline data_table& from_matrix(const mat<real>& m, const std::vector<std::string>& col_names) {
506
507 clear();
508
509 for (size_t i = 0; i < m.cols(); ++i) {
510
511 vec<real> column (m.rows());
512
513 for (size_t j = 0; j < m.rows(); ++j)
514 column[j] = m(j, i);
515
516 std::string col_name = i < col_names.size() ? col_names[i] : "col_" + std::to_string(i);
517 column_names.emplace_back(col_name);
518 columns.emplace_back(column);
519 indices[col_name] = columns.size() - 1;
520 }
521
522 return *this;
523 }
524
525
526#ifndef THEORETICA_NO_PRINT
527
529 inline std::string to_string(unsigned int max_rows = 8, unsigned int precision = 6, unsigned int max_width = 12) const {
530
531 std::stringstream res;
532
533 // Print column names
534 for (size_t i = 0; i < column_names.size(); ++i) {
535
536 if (column_names[i].size() > max_width)
537 res << std::setw(max_width + 2) << column_names[i].substr(0, max_width - 3) + "...";
538 else
539 res << std::setw(max_width + 2) << column_names[i] << "\t";
540 }
541
542 res << "\n";
543
544 // Print data rows
545 size_t num_rows = columns.empty() ? 0 : columns[0].size();
547
548 for (size_t i = 0; i < rows_to_print; ++i) {
549
550 for (size_t j = 0; j < columns.size(); ++j) {
551
552 if (i < columns[j].size()) {
553 res << std::setw(max_width + 2) << std::setprecision(precision) << columns[j][i] << "\t";
554 } else {
555 res << std::setw(max_width + 2) << "\t";
556 }
557 }
558
559 res << "\n";
560 }
561
562 if (num_rows > max_rows) {
563 res << "... " << (num_rows - max_rows) << " more rows\n";
564 }
565
566 return res.str();
567 }
568
569
571 inline operator std::string() {
572 return to_string();
573 }
574
575
577 inline friend std::ostream& operator<<(std::ostream& out, const data_table& obj) {
578 return out << obj.to_string();
579 }
580
581#endif
582
583 };
584
585}
586
587
588#endif
A data structure for holding labeled columns of data, where each column is a vector of real numbers.
Definition data_table.h:26
std::vector< std::string > header() const
Get a list of the column names in the data table.
Definition data_table.h:145
bool has_column(const std::string &name) const
Check whether the data table has a column with the given name.
Definition data_table.h:153
std::vector< vec< real > > & data()
Get the columns of the data table as a simple vector of column vectors, without the column names or i...
Definition data_table.h:162
void clear()
Remove all columns from the data table, leaving it empty.
Definition data_table.h:136
data_table(const std::map< std::string, vec< real > > &table)
Construct from map of column vectors.
Definition data_table.h:48
mat< real > to_matrix() const
Convert the data table to a matrix, where each column of the matrix corresponds to a column in the da...
Definition data_table.h:482
size_t size() const
Get the total number of elements in the data table, i.e.
Definition data_table.h:118
std::unordered_map< std::string, real > row(size_t idx) const
Get an entire row as a map of column names to values.
Definition data_table.h:307
data_table & insert(const std::string &name, const vec< real > &data)
Insert a new column into the data table with the given name and data.
Definition data_table.h:390
data_table & rename(const std::string &old_name, const std::string &new_name)
Rename a column in the data table from old_name to new_name.
Definition data_table.h:462
const real & at(const std::string &col, size_t row) const
Access an element by row index and column name, returning a const reference to the value.
Definition data_table.h:286
data_table & insert(const std::string &name, size_t num_rows, real value=0.0)
Insert a new column into the data table with the given name, number of rows, and constant value.
Definition data_table.h:411
const std::vector< vec< real > > & data() const
Get the columns of the data table as a simple vector of column vectors, without the column names or i...
Definition data_table.h:170
data_table head(size_t n=5) const
Get the first n rows of the data table as a new data table.
Definition data_table.h:341
vec< real > & operator[](const std::string &name)
Access a column by name, returning a reference to the column vector.
Definition data_table.h:180
data_table & drop_columns(const std::vector< std::string > &names)
Drop multiple columns from the data table by name.
Definition data_table.h:448
const vec< real > & operator[](const std::string &name) const
Access a column by name, returning a const reference to the column vector.
Definition data_table.h:195
friend std::ostream & operator<<(std::ostream &out, const data_table &obj)
Stream the data table in string representation to an output stream (std::ostream)
Definition data_table.h:577
data_table(data_table &&other) noexcept
Move constructor from another data table, leaving the other empty.
Definition data_table.h:80
size_t rows() const
Get the (maximum) number of rows in the data table.
Definition data_table.h:95
size_t cols() const
Get the number of columns in the data table.
Definition data_table.h:111
real & at(const std::string &col, size_t row)
Access an element by row index and column name, returning a reference to the value.
Definition data_table.h:264
data_table()
Default constructor.
Definition data_table.h:41
vec< real > & at(const std::string &name)
Access a column by name, returning a reference to the column vector.
Definition data_table.h:210
vec< real > row_vec(size_t idx) const
Get an entire row as a vector of values, in the same order as the columns in the data table.
Definition data_table.h:325
data_table select(const std::vector< std::string > &cols) const
Select a subset of columns from the table, returning a new table containing only the selected columns...
Definition data_table.h:240
std::string to_string(unsigned int max_rows=8, unsigned int precision=6, unsigned int max_width=12) const
Convert the data table to string representation.
Definition data_table.h:529
data_table & from_matrix(const mat< real > &m, const std::vector< std::string > &col_names)
Convert a matrix to a data table, where each column of the matrix corresponds to a column in the data...
Definition data_table.h:505
bool empty() const
Check whether the data table is empty (i.e.
Definition data_table.h:130
data_table(const data_table &other)
Copy constructor from another data table.
Definition data_table.h:73
data_table & drop_column(const std::string &name)
Drop a column from the data table by name.
Definition data_table.h:424
data_table tail(size_t n=5) const
Get the last n rows of the data table as a new data table.
Definition data_table.h:365
vec< real > & operator[](size_t idx)
Access a column by index, returning a reference to the column vector.
Definition data_table.h:226
data_table(size_t num_rows, const std::vector< std::string > &column_names)
Construct a data table with preallocated size.
Definition data_table.h:62
A generic matrix with a fixed number of rows and columns.
Definition mat.h:136
A statically allocated N-dimensional vector with elements of the given type.
Definition vec.h:92
Main namespace of the library which contains all functions and objects.
Definition algebra.h:27
double real
A real number, defined as a floating point type.
Definition constants.h:207
auto min(const Vector &X)
Finds the minimum value inside a dataset.
Definition dataset.h:351
auto sum(const Vector &X)
Compute the sum of a vector of real values using pairwise summation to reduce round-off error.
Definition dataset.h:219
TH_CONSTEXPR Type make_error()
Create a number representing an error state, constructed from a NaN value.
Definition real_analysis.h:1322
TH_CONSTEXPR real nan()
Return a quiet NaN number in floating point representation.
Definition error.h:78