I am loading a 10GB file into memory and I find that even if I strip away any extra overhead and store the data in nothing but an array it still takes up 53 GB of ram. This seems crazy to me since I am converting some of the text data to longs which take up less room and convert the rest to char * which should take up the same amount of room as a text file. I have about 150M rows of data in the file I am trying to load. Is there any reason why this should take up so much ram when I load it the way I do below?
There are three files here a fileLoader class and its header file and a main that simply runs them. To answer some questions: OS is UBUNTU 12.04 64bit This is on a machien with 64GB of RAM and an SSD hd that I have providing 64GB of swap space for RAM I am loading all of the data at once becuase of the need for speed. It is critical for the application. All sorting, indexing, and lots of the data intensive work runs on the GPU. The other reason is that loading all of the data at once made it much simpler for me to write the code. I dont have to worry about indexed files, and mappings to locations in another file for example.
Here is the header file:
#ifndef FILELOADER_H_
#define FILELOADER_H_
#include <iostream>
#include <fstream>
#include <fcntl.h>
#include <stdlib.h>
#include <string.h>
#include <string>
class fileLoader {
public:
fileLoader();
virtual ~fileLoader();
void loadFile();
private:
long long ** longs;
char *** chars;
long count;
long countLines(std::string inFile);
};
#endif /* FILELOADER_H_ */
Here is the CPP file
#include "fileLoader.h"
fileLoader::fileLoader() {
// TODO Auto-generated constructor stub
this->longs = NULL;
this->chars = NULL;
}
char ** split(char * line,const char * delim,int size){
char ** val = new char * [size];
int i = 0;
bool parse = true;
char * curVal = strsep(&line,delim);
while(parse){
if(curVal != NULL){
val[i] = curVal;
i++;
curVal = strsep(&line,delim);
}else{
parse = false;
}
}
return val;
}
void fileLoader::loadFile(){
const char * fileName = "/blazing/final/tasteslikevictory";
std::string fileString(fileName);
//-1 since theres a header row and we are skipinig it
this->count = countLines(fileString) -1;
this->longs = new long long*[this->count];
this->chars = new char **[this->count];
std::ifstream inFile;
inFile.open(fileName);
if(inFile.is_open()){
std::string line;
int i =0;
getline(inFile,line);
while(getline(inFile,line)){
this->longs[i] = new long long[6];
this->chars[i] = new char *[7];
char * copy = strdup(line.c_str());
char ** splitValues = split(copy,"|",13);
this->longs[i][0] = atoll(splitValues[4]);
this->longs[i][1] = atoll(splitValues[5]);
this->longs[i][2] = atoll(splitValues[6]);
this->longs[i][3] = atoll(splitValues[7]);
this->longs[i][4] = atoll(splitValues[11]);
this->longs[i][5] = atoll(splitValues[12]);
this->chars[i][0] = strdup(splitValues[0]);
this->chars[i][1] = strdup(splitValues[1]);
this->chars[i][2] = strdup(splitValues[2]);
this->chars[i][3] = strdup(splitValues[3]);
this->chars[i][4] = strdup(splitValues[8]);
this->chars[i][5] = strdup(splitValues[9]);
this->chars[i][6] = strdup(splitValues[10]);
i++;
delete[] splitValues;
free(copy);
}
}
}
fileLoader::~fileLoader() {
// TODO Auto-generated destructor stub
if(this->longs != NULL){
delete[] this->longs;
}
if(this->chars != NULL){
for(int i =0; i <this->count;i++ ){
free(this->chars[i]);
}
delete[] this->chars;
}
}
long fileLoader::countLines(std::string inFile){
int BUFFER_SIZE = 16*1024;
int fd = open(inFile.c_str(), O_RDONLY);
if(fd == -1)
return 0;
/* Advise the kernel of our access pattern. */
posix_fadvise(fd, 0, 0, 1); // FDADVICE_SEQUENTIAL
char buf[BUFFER_SIZE + 1];
long lines = 0;
while(size_t bytes_read = read(fd, buf, BUFFER_SIZE))
{
if(bytes_read == (size_t)-1)
return 0;
if (!bytes_read)
break;
for(char *p = buf; (p = (char*) memchr(p, '\n', (buf + bytes_read) - p)); ++p)
++lines;
}
return lines;
}
Here is the file with my main function:
#include "fileLoader.h"
int main()
{
fileLoader loader;
loader.loadFile();
return 0;
}
Here is an example of the data that I am loading:
13|0|1|1997|113|1|4|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
14|0|1|1997|113|1|5|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
15|0|1|1997|113|1|6|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
16|0|1|1997|113|1|7|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
17|0|1|1997|113|1|8|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
18|0|1|1997|113|1|9|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
19|0|1|1997|113|1|10|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
20|0|1|1997|113|1|11|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
21|0|1|1997|113|1|12|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
9|0|1|1997|113|1|13|12408012|C9FF921CA04ADA3D606BF6DAC4A0B092|SEMANAL|66C5E828DC69F857ADE060B8062C923E|113|1
27|0|1|1992|125|1|1|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
28|0|1|1992|125|1|2|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
29|0|1|1992|125|1|3|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
30|0|1|1992|125|1|4|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
31|0|1|1992|125|1|5|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
32|0|1|1992|125|1|6|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
33|0|1|1992|125|1|7|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
34|0|1|1992|125|1|8|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
35|0|1|1992|125|1|9|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
36|0|1|1992|125|1|10|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
37|0|1|1992|125|1|11|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
38|0|1|1992|125|1|12|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
39|0|1|1992|125|1|13|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
40|0|1|1992|125|1|14|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
41|0|1|1992|125|1|15|10183|9EF534D2CF74B24AC28CBD9BE937A412|SEMANAL|375CCE505F5353CCDE85D4E84A9888D8|125|1
10|0|1|1996|126|1|1||||||