16

I need to read all blocks of one large file(about 10GB) sequentially, the file contains many floats with a few strings, like this(each item splited by '\n'): 6.292611 -1.078219E-266 -2.305673E+065 sod;eiwo 4.899747e-237 1.673940e+089 -4.515213

I read MAX_NUM_PER_FILE items each time and process them and write to another file, but i don't know when the ifstream is ended. Here is my code:

ifstream file_input(path_input);  //my file is a text file, but i tried  both text and binary mode, both failed.
if(file_input)
{
    file_input.seekg(0,file_input.end);
    unsigned long long length = file_input.tellg();    //get file size
    file_input.seekg(0,file_input.beg);

    char * buffer = new char [MAX_NUM_PER_FILE+MAX_NUM_PER_LINE];
    int i=1,j;
    char c,tmp[3];
    while(file_input.tellg()<length)
    {
        file_input.read(buffer,MAX_NUM_PER_FILE);
        j=MAX_NUM_PER_FILE;
        while(file_input.get(c)&&c!='\n')
            buffer[j++]=c;   //get a complete item

        //process with buffer...

        itoa(i++,tmp,10);    //int2char
        string out_name="out"+string(tmp)+".txt";
        ofstream file_output(out_name);
        file_output.write(buffer,j);
        file_output.close();
    }

    file_input.close();
    delete[] buffer;
}

My code goes wrong, length is bigger than real file size. I have tried file_input.good() or !file_input.eof(), they didn't work, getline(file_input,s) is good, but it is much slower than read, i want read, but i don't know how to check whether ifstream is end-of-file.

I do my work in WINDOWS 7 with VS2010.

I have searched, but there are not any answer about it, How to open a file using ifstream and keep reading it until the end this link can't answer my question.


Update, Problem solved

Hi everyone, I have figured it out that it's my fault. Both while(file_input.tellg()<length) and while(file_input.peek()!=EOF) work fine! while(file_input.peek()!=EOF) is recommended.

The extra items written after the end-of-file is the left items in buffer written in the last time.

Here is the correct code:

ifstream file_input(path_input);
if(file_input)
{
    //file_input.seekg(0,file_input.end);
    //unsigned long long length = file_input.tellg();   //get file size
    //file_input.seekg(0,file_input.beg);

    char * buffer = new char [MAX_NUM_PER_FILE+MAX_NUM_PER_LINE];
    int i=1,j;
    char c,tmp[3];
    while(file_input.peek()!=EOF)
    {
        memset(buffer,0,sizeof(char)*(MAX_NUM_PER_FILE+MAX_NUM_PER_LINE));  //clear first!
        file_input.read(buffer,MAX_NUM_PER_FILE);
        j=MAX_NUM_PER_FILE;
        while(file_input.get(c)&&c!='\n')
            buffer[j++]=c;

        itoa(i++,tmp,10);//int2char
        string out_name="out"+string(tmp)+".txt";
        ofstream file_output(out_name);
        file_output.write(buffer,strlen(buffer));   //use the correct buffer size instead of j
        file_output.close();
    }

    file_input.close();
    delete[] buffer;
}
Community
  • 1
  • 1
user1024
  • 982
  • 4
  • 13
  • 26
  • 2
    You shouldn't be checking EOF *before* reading. Attempt to read, and check if it succeeded. – The Paramagnetic Croissant Jan 23 '15 at 07:57
  • @TheParamagneticCroissant You mean `while(file_input.read(buffer,MAX_NUM_PER_FILE))` ? I tried this, but it can read all the file, a few items aren't be readed. – user1024 Jan 23 '15 at 09:24
  • @MSalters I don't think the linked question actually answers this one. – Pixelchemist Jan 23 '15 at 09:35
  • Seems I picked the wrong Q from the list of possible duplicates, which TBH is a rather big list. See e.g. http://stackoverflow.com/questions/5605125/why-is-iostreameof-inside-a-loop-condition-considered-wrong?lq=1 (highest voted Q&A on `.eof()`) – MSalters Jan 23 '15 at 09:55
  • Another indicator of a failed STL design, as if asking for size or end of stream was too extraordinary. – Sam Ginrich Jan 02 '22 at 09:57

1 Answers1

30
while( file_input.peek() != EOF )
{
    // code
}

Basically peek() will read the next char without extracting it.

So you can simply compare it to EOF.

deW1
  • 5,562
  • 10
  • 38
  • 54