Here is one possible solution
And I hate it. I would never do something like that. Becuase the design idea or the rquirement is already nonesense.
Either, we use types and we know the which column has what type, or we simply use a fits-to-all-type for the required context. In this case, simply a std::string
.
But doing this dynamically will result in really ugly and not maintanable code.
The solution here is std::any. But maybe a class hierachie would be even better. I will try later.
Please see this ugly code:
#include <iostream>
#include <sstream>
#include <vector>
#include <regex>
#include <string>
#include <iterator>
#include <algorithm>
#include <utility>
#include <any>
#include <map>
#include <tuple>
// the delimiter for the csv
const std::regex re(",");
// One DataRow from the csv file
struct DataRow {
std::vector<std::string> columns{};
friend std::istream& operator >> (std::istream& is, DataRow& dr) {
// Read one complete line
if (std::string line{}; std::getline(is, line)) {
// Split the string, containing the complete line into parts
dr.columns.clear();
std::copy(std::sregex_token_iterator(line.begin(), line.end(), re, -1), {}, std::back_inserter(dr.columns));
}
return is;
}
};
struct CSV {
protected:
// Conversion functions
std::any stringToAnySTRING(const std::string& s) { return s; }
std::any stringToAnyBOOL(const std::string& s) { bool result{ false }; if (s == "TRUE") result = true; return result; }
std::any stringToAnyINT(const std::string& s) { int result = std::stoi(s); return result; }
std::any stringToAnyLONG(const std::string& s) { long result = std::stol(s); return result; }
// Making Reading easier
using ConvertToAny = std::any(CSV::*)(const std::string&);
// Map conversion functions to type strings
std::map<std::string, ConvertToAny> converter{
{"STRING", &CSV::stringToAnySTRING},
{"BOOL", &CSV::stringToAnyBOOL},
{"INT", &CSV::stringToAnyINT},
{"LONG", &CSV::stringToAnyLONG}
};
public:
// Header, Types and data as std::any
std::vector<std::string> header{};
std::vector<std::string> types{};
std::vector<std::vector<std::any>> data{};
// Extractor operator
friend std::istream& operator >> (std::istream& is, CSV& c) {
// Read header line
if (std::string line{}; std::getline(is, line)) {
// Split header line into sub strings
c.header.clear();
std::copy(std::sregex_token_iterator(line.begin(), line.end(), re, -1), {}, std::back_inserter(c.header));
// Read types line
if (std::getline(is, line)) {
// Spit types into sub strings
c.types.clear();
std::copy(std::sregex_token_iterator(line.begin(), line.end(), re, -1), {}, std::back_inserter(c.types));
// Read all data, so all lines, split them and convert them to the desired data type
c.data.clear();
// This will read all lines and split them into columns
std::vector<DataRow> drs(std::istream_iterator<DataRow>(is), {});
// Make at least one plausibility check, that all rows have the same number of columns
size_t minDataLength = std::min_element(drs.begin(), drs.end(), [](const DataRow& dr1, const DataRow& dr2)
{return dr1.columns.size() < dr2.columns.size(); })->columns.size();
if (c.header.size() == c.types.size() && c.types.size() == minDataLength) {
// Now convert all columns into the type denoted by the read type array and store them as any data
// Double transform because of 2 dimensional array
std::transform(drs.begin(), drs.end(), std::back_inserter(c.data), [&c](const DataRow& dr) {
std::vector<std::any> va{};
// This is the conversion into a type defined by the types array
// Anybody who understands this transfrom will get the Nobel price for Obfuscation
std::transform(dr.columns.begin(), dr.columns.end(), std::back_inserter(va),
[&c, i = 0U](const std::string& s) mutable {return (c.*(c.converter[c.types[i++]]))(s); });
return va; });
}
}
}
return is;
}
// Inserter operator
friend std::ostream& operator << (std::ostream& os, const CSV& c) {
// Write header
os << "Header: ";
std::copy(c.header.begin(), c.header.end(), std::ostream_iterator<std::string>(os, " "));
// And the type names
os << "\nTypes: ";
std::copy(c.types.begin(), c.types.end(), std::ostream_iterator<std::string>(os, " "));
os << "\n\nData:\n";
// And the types. Arrgh. How ugly
std::for_each(c.data.begin(), c.data.end(), [&c,&os](const std::vector<std::any>& va) {
for (size_t i = 0U; i < va.size(); ++i) {
if (c.types[i] == "INT") { int v = std::any_cast<int>(va[i]); os << v << " "; }
else if (c.types[i] == "LONG") { long v = std::any_cast<long>(va[i]); os << v << " "; }
else if (c.types[i] == "STRING") { std::string v = std::any_cast<std::string>(va[i]); os << v << " "; }
else if (c.types[i] == "BOOL") { bool v = std::any_cast<bool>(va[i]); os << v << " "; }
}
os << "\n";
});
return os;
}
};
// The data. Does not matter if file or stringstream. Is the same
std::istringstream csvFile{ R"(year,category,winner,entity
INT,STRING,BOOL,STRING
2015,CHEF OF THE YEAR,FALSE,John Doe
2015,CHEF OF THE YEAR,FALSE,Bob Brown
2015,CHEF OF THE YEAR,TRUE,William Thorton
2015,CHEF OF THE YEAR,FALSE,Jacob Smith)" };
int main() {
// Define varaiable of type csv
CSV csv{};
// Read from somewhere
csvFile >> csv;
// Show some debug output
std::cout << csv;
return 0;
}