6#ifndef THEORETICA_IO_DATA_TABLE_H
7#define THEORETICA_IO_DATA_TABLE_H
11#include <unordered_map>
15#include "../core/constants.h"
16#include "../algebra/vec.h"
17#include "../algebra/mat.h"
30 std::vector<vec<real>> columns {};
33 std::vector<std::string> column_names {};
36 std::unordered_map<std::string, size_t> indices {};
51 column_names.emplace_back(
pair.first);
52 columns.emplace_back(
pair.second);
53 indices[
pair.first] = columns.size() - 1;
64 this->column_names = column_names;
67 for (
size_t i = 0; i < column_names.size(); ++i)
68 indices[column_names[i]] = i;
74 columns =
other.columns;
75 column_names =
other.column_names;
76 indices =
other.indices;
82 columns = std::move(
other.columns);
83 column_names = std::move(
other.column_names);
84 indices = std::move(
other.indices);
85 other.columns.clear();
86 other.column_names.clear();
87 other.indices.clear();
95 inline size_t rows()
const {
101 for (
const auto& col : columns)
112 return columns.size();
121 for (
const auto& col : columns)
138 column_names.clear();
145 inline std::vector<std::string>
header()
const {
154 return indices.find(name) != indices.end();
162 inline std::vector<vec<real>>&
data() {
170 inline const std::vector<vec<real>>&
data()
const {
182 auto it = indices.find(name);
183 if (
it == indices.end()) {
184 throw std::out_of_range(
"data_table::operator[]: column '" + name +
"' not found");
187 return columns[
it->second];
197 auto it = indices.find(name);
198 if (
it == indices.end()) {
199 throw std::out_of_range(
"data_table::operator[]: column '" + name +
"' not found");
202 return columns[
it->second];
212 auto it = indices.find(name);
213 if (
it == indices.end()) {
214 throw std::out_of_range(
"data_table::at: column '" + name +
"' not found");
217 return columns[
it->second];
247 if (
it != indices.end()) {
249 result.columns.emplace_back(columns[
it->second]);
266 auto it = indices.find(col);
267 if (
it == indices.end()) {
268 throw std::out_of_range(
"data_table::at: column '" + col +
"' not found");
273 throw std::out_of_range(
"data_table::at: row index " + std::to_string(
row) +
" out of range");
286 inline const real&
at(
const std::string& col,
size_t row)
const {
288 auto it = indices.find(col);
289 if (
it == indices.end()) {
290 throw std::out_of_range(
"data_table::at: column '" + col +
"' not found");
295 throw std::out_of_range(
"data_table::at: row index " + std::to_string(
row) +
" out of range");
307 inline std::unordered_map<std::string, real>
row(
size_t idx)
const {
309 std::unordered_map<std::string, real>
result;
311 for (
size_t i = 0; i < columns.size(); ++i) {
312 const real value =
idx < columns[i].size() ? columns[i][
idx] :
nan();
313 result[column_names[i]] = value;
329 for (
size_t i = 0; i < columns.size(); ++i)
345 for (
size_t i = 0; i < columns.size(); ++i) {
352 result.column_names.emplace_back(column_names[i]);
354 result.indices[column_names[i]] =
result.columns.size() - 1;
369 for (
size_t i = 0; i < columns.size(); ++i) {
376 result.column_names.emplace_back(column_names[i]);
378 result.indices[column_names[i]] =
result.columns.size() - 1;
392 auto it = indices.find(name);
393 if (
it != indices.end()) {
394 columns[
it->second] =
data;
396 column_names.emplace_back(name);
397 columns.emplace_back(
data);
398 indices[name] = columns.size() - 1;
426 auto it = indices.find(name);
427 if (
it != indices.end()) {
429 size_t idx =
it->second;
430 columns.erase(columns.begin() +
idx);
431 column_names.erase(column_names.begin() +
idx);
435 for (
size_t i =
idx; i < column_names.size(); ++i)
436 indices[column_names[i]] = i;
450 for (
const auto& name :
names)
465 if (
it != indices.end()) {
467 size_t i =
it->second;
487 for (
size_t i = 0; i < columns.size(); ++i) {
490 result(
j, i) =
j < columns[i].size() ? columns[i][
j] :
nan();
509 for (
size_t i = 0; i <
m.cols(); ++i) {
513 for (
size_t j = 0;
j <
m.rows(); ++
j)
517 column_names.emplace_back(
col_name);
518 columns.emplace_back(
column);
519 indices[
col_name] = columns.size() - 1;
526#ifndef THEORETICA_NO_PRINT
531 std::stringstream
res;
534 for (
size_t i = 0; i < column_names.size(); ++i) {
539 res << std::setw(
max_width + 2) << column_names[i] <<
"\t";
545 size_t num_rows = columns.empty() ? 0 : columns[0].size();
550 for (
size_t j = 0;
j < columns.size(); ++
j) {
552 if (i < columns[
j].
size()) {
571 inline operator std::string() {
578 return out <<
obj.to_string();
A data structure for holding labeled columns of data, where each column is a vector of real numbers.
Definition data_table.h:26
std::vector< std::string > header() const
Get a list of the column names in the data table.
Definition data_table.h:145
bool has_column(const std::string &name) const
Check whether the data table has a column with the given name.
Definition data_table.h:153
std::vector< vec< real > > & data()
Get the columns of the data table as a simple vector of column vectors, without the column names or i...
Definition data_table.h:162
void clear()
Remove all columns from the data table, leaving it empty.
Definition data_table.h:136
data_table(const std::map< std::string, vec< real > > &table)
Construct from map of column vectors.
Definition data_table.h:48
mat< real > to_matrix() const
Convert the data table to a matrix, where each column of the matrix corresponds to a column in the da...
Definition data_table.h:482
size_t size() const
Get the total number of elements in the data table, i.e.
Definition data_table.h:118
std::unordered_map< std::string, real > row(size_t idx) const
Get an entire row as a map of column names to values.
Definition data_table.h:307
data_table & insert(const std::string &name, const vec< real > &data)
Insert a new column into the data table with the given name and data.
Definition data_table.h:390
data_table & rename(const std::string &old_name, const std::string &new_name)
Rename a column in the data table from old_name to new_name.
Definition data_table.h:462
const real & at(const std::string &col, size_t row) const
Access an element by row index and column name, returning a const reference to the value.
Definition data_table.h:286
data_table & insert(const std::string &name, size_t num_rows, real value=0.0)
Insert a new column into the data table with the given name, number of rows, and constant value.
Definition data_table.h:411
const std::vector< vec< real > > & data() const
Get the columns of the data table as a simple vector of column vectors, without the column names or i...
Definition data_table.h:170
data_table head(size_t n=5) const
Get the first n rows of the data table as a new data table.
Definition data_table.h:341
vec< real > & operator[](const std::string &name)
Access a column by name, returning a reference to the column vector.
Definition data_table.h:180
data_table & drop_columns(const std::vector< std::string > &names)
Drop multiple columns from the data table by name.
Definition data_table.h:448
const vec< real > & operator[](const std::string &name) const
Access a column by name, returning a const reference to the column vector.
Definition data_table.h:195
friend std::ostream & operator<<(std::ostream &out, const data_table &obj)
Stream the data table in string representation to an output stream (std::ostream)
Definition data_table.h:577
data_table(data_table &&other) noexcept
Move constructor from another data table, leaving the other empty.
Definition data_table.h:80
size_t rows() const
Get the (maximum) number of rows in the data table.
Definition data_table.h:95
size_t cols() const
Get the number of columns in the data table.
Definition data_table.h:111
real & at(const std::string &col, size_t row)
Access an element by row index and column name, returning a reference to the value.
Definition data_table.h:264
data_table()
Default constructor.
Definition data_table.h:41
vec< real > & at(const std::string &name)
Access a column by name, returning a reference to the column vector.
Definition data_table.h:210
vec< real > row_vec(size_t idx) const
Get an entire row as a vector of values, in the same order as the columns in the data table.
Definition data_table.h:325
data_table select(const std::vector< std::string > &cols) const
Select a subset of columns from the table, returning a new table containing only the selected columns...
Definition data_table.h:240
std::string to_string(unsigned int max_rows=8, unsigned int precision=6, unsigned int max_width=12) const
Convert the data table to string representation.
Definition data_table.h:529
data_table & from_matrix(const mat< real > &m, const std::vector< std::string > &col_names)
Convert a matrix to a data table, where each column of the matrix corresponds to a column in the data...
Definition data_table.h:505
bool empty() const
Check whether the data table is empty (i.e.
Definition data_table.h:130
data_table(const data_table &other)
Copy constructor from another data table.
Definition data_table.h:73
data_table & drop_column(const std::string &name)
Drop a column from the data table by name.
Definition data_table.h:424
data_table tail(size_t n=5) const
Get the last n rows of the data table as a new data table.
Definition data_table.h:365
vec< real > & operator[](size_t idx)
Access a column by index, returning a reference to the column vector.
Definition data_table.h:226
data_table(size_t num_rows, const std::vector< std::string > &column_names)
Construct a data table with preallocated size.
Definition data_table.h:62
A generic matrix with a fixed number of rows and columns.
Definition mat.h:136
A statically allocated N-dimensional vector with elements of the given type.
Definition vec.h:92
Main namespace of the library which contains all functions and objects.
Definition algebra.h:27
double real
A real number, defined as a floating point type.
Definition constants.h:207
auto min(const Vector &X)
Finds the minimum value inside a dataset.
Definition dataset.h:351
auto sum(const Vector &X)
Compute the sum of a vector of real values using pairwise summation to reduce round-off error.
Definition dataset.h:219
TH_CONSTEXPR Type make_error()
Create a number representing an error state, constructed from a NaN value.
Definition real_analysis.h:1322
TH_CONSTEXPR real nan()
Return a quiet NaN number in floating point representation.
Definition error.h:78