I have data file of 36MB(each value in file is double type) residing on hard disk. My question is, when I read this file via c++ in RAM putting content in matrix (provided by boost library), does it going to occupy only 36MB of RAM or different? Am I running out of memory?
The reason is that I am on 64-bit ubuntu platform with 8 GB RAM and I am getting bad allocation error. The same file reading program works fine for small data files.
Below is snippet to load the (data real-sim )[https://www.csie.ntu.edu.tw/~cjlin/libsvmtools/datasets/binary.html] . x and y are boost matrix and vector respectively declared as extern
in .h file.
void load_data(const char* filename)
{
ifstream in(filename);
string line;
int line_num = 0;
if (in.is_open()) {
while (in.good()) {
getline(in, line);
if (line.empty()) continue;
int cat = 0;
if (!parse_line(line, cat, line_num)) {
cout << "parse line: " << line << ", failed.." << endl;
continue;
}
y(line_num) = cat;
line_num += 1;
}
in.close();
}
}
bool debug = false;
using namespace boost::numeric::ublas;
vector<double> y(no_records);
matrix<double> x(no_records,no_features);
using namespace std;
template < class T>
void convert_from_string(T& value, const string& s)
{
stringstream ss(s);
ss >> value;
}
int get_cat(const string& data) {
int c;
convert_from_string(c, data);
return c;
}
bool get_features(const string& data, int& index, double& value) {
int pos = data.find(":");
if (pos == -1) return false;
convert_from_string(index, data.substr(0, pos));
convert_from_string(value, data.substr(pos + 1));
return true;
}
bool parse_line(const string& line, int& cat, const int line_num) {
if (line.empty()) return false;
size_t start_pos = 0;
char space = ' ';
while (true) {
size_t pos = line.find(space, start_pos);
if ((int)pos != -1) {
string data = line.substr(start_pos, pos - start_pos);
if (!data.empty()) {
if (start_pos == 0) {
cat = get_cat(data);
}
else {
int index = -1;
double v = 0;
get_features(data, index, v);
if (debug)
cout << "index: " << index << "," << "value: " << v << endl;
if (index != -1) {
index -= 1; // index from 0
x(line_num, index) = v;
}
}
}
start_pos = pos + 1;
}
else {
string data = line.substr(start_pos, pos - start_pos);
if (!data.empty()) {
cout << "read data: " << data << endl;
int index = -1;
double v = 0;
get_features(data, index, v);
if (debug)
cout << "index: " << index << "," << "value: " << v << endl;
if (index != -1) {
index -= 1; // index from 0
x(line_num, index) = v;
}
}
break;
}
}
return true;
}