2
/* Utility function to read lines of unknown lengths */
char *readline(FILE* fp, int max_length)
{
    //The size is extended by the input with the value of the provisional
    char *str;
    int ch;
    int len = 0;
    int current_max = max_length;

    str = (char*)malloc(sizeof(char)*current_max);
    if(!str)
        return str;

    while((char)(ch = fgetc(fp))!='\n' && ch != EOF)
    {
        str[len++] = ch;
        if(len == current_max)
        {
            current_max = current_max + max_length;
            str = realloc(str, sizeof(char)*current_max);
            if(!str)
                return str;
        }
    }
    str[len] = '\0';

    return str;
}

I have the above code snippet to read a line of unknown length. I am able to read single line inputs, as expected from stdin, but while reading a file I am having trouble in determining the EOF of the file.

While reading from a file, I am reading it line by line in a loop, now I want to break off the loop once all lines have been read but I am not able to determine when to do so, hence the loop ends up executing forever. Please help me with determining the break condition.

char *line;

while(1)
{
    line = readline(fd, MAX_INPUT_LENGTH);

    /*IF all lines have been read then break off the loop, basically determine EOF ?*

    //text processing on the line

}
Barmar
  • 741,623
  • 53
  • 500
  • 612
bawejakunal
  • 1,678
  • 2
  • 25
  • 54
  • [Please see this discussion on why not to cast the return value of `malloc()` and family in `C`.](http://stackoverflow.com/q/605845/2173917). – Sourav Ghosh Sep 08 '16 at 21:52
  • 4
    So the problem is that you don't know how the function can signal to its caller that the end of the file has been reached? Consider returning NULL in the event that EOF is detected before any characters are successfully read. – John Bollinger Sep 08 '16 at 21:54
  • 1
    The cast in your `while` is unnecessary (`'\n'` is an `int`). I usually use `while ((ch = fgetc(fp)) != EOF && ch != '\n')` though either order works. – Jonathan Leffler Sep 08 '16 at 22:02
  • What is the point of `max_length`? It does not limit input to a "max". Better to make it a hard maximum OR let `readline()` manage growth itself. – chux - Reinstate Monica Sep 09 '16 at 01:16
  • @chux that is just for reusability purpose, in the actual function call I am passing a hard maximum only, defined as a macro at the beginning of source file. Sorry if this caused confusion, I cant post the complete source code here. – bawejakunal Sep 09 '16 at 01:23
  • IMO, your best bet is to change the api to `int readline(FILE *, char **)` and make `line` an "out" parameter. Since you are dynamically allocating memory, passing in MAX_INPUT_LENGTH seems unnecessary. Return 1 if you read a line, or maybe the number of characters read, or whatever is most useful for you, and return EOF when there's no data. – William Pursell Sep 09 '16 at 01:31

4 Answers4

0

Try:

while((ch = fgetc(fp))!=EOF)
{
    str[len++] = ch;
    if (ch=='\n')
        break;
}
str[len]= '\0';
return(str);

This separates EOL handling from EOF handling. After reading EOF, the next call to readline will return an empty string, the signal that EOF has been reached.

Paul Ogilvie
  • 25,048
  • 4
  • 23
  • 41
0

you should change the code this way:

    char *line;
    int  eofreached; // pay attention
    eofreached = 0;// pay attention
while(eofreached == 0)// pay attention
{
    line = readline(fd, MAX_INPUT_LENGTH, &eofreached);// pay attention

    /*IF all lines have been read then break off the loop, basically determine EOF ?*

    //text processing on the line

}

/* Utility function to read lines of unknown lengths */
char *readline(FILE* fp, int max_length, int* eof_found)// pay attention
{
    //The size is extended by the input with the value of the provisional
    char *str;
    int ch;
    int len = 0;
    int current_max = max_length;

    str = (char*)malloc(sizeof(char)*current_max);
    if(!str)
        return str;

    while((char)(ch = fgetc(fp))!='\n' && ch != EOF)
    {
        str[len++] = ch;
        if(len == current_max)
        {
            current_max = current_max + max_length;
            str = realloc(str, sizeof(char)*current_max);
            if(!str)
                return str;
        }
    }
    if ( ch == EOF )    // pay attention // this line and next line
        *eof_found = 1; // pay attention // can be improved: *eof_found = ch == EOF
    str[len] = '\0';

    return str;
}
Maxim Kitsenko
  • 2,042
  • 1
  • 20
  • 43
0

The best way to handle this is to have readline return NULL for EOF [or error]. But, you also have to account for blank lines.

I've changed and annotated your code to something that I believe will work. The max_length wasn't as useful because of the changes to how the realloc was done [please pardon the gratuitous style cleanup]:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

/* Utility function to read lines of unknown lengths */
// RETURNS: string pointer (NULL means EOF or realloc failure)
char *
readline(FILE *fp,int max_length)
{
    // The size is extended by the input with the value of the provisional
    char *str;
    char *tmp;
    int ch;
    int len = 0;
    int current_max = 0;

    // assume we'll get EOF
    str = NULL;

    while (1) {
        ch = fgetc(fp);
        if (ch == EOF)
            break;

        // enable this if you want to absorb blank lines invisibly
#if 0
        if (ch == '\n')
            break;
#endif

        // grow the buffer
        if (len == current_max) {
            // this "grow length" can be any amount
            current_max += 10;

            tmp = str;
            str = realloc(tmp,current_max + 1);

            if (str == NULL) {
                free(tmp);
                break;
            }
        }

        // check for newline
        // NOTE: do this _after_ the realloc to differentiate a blank line
        // from EOF
#if 1
        if (ch == '\n')
            break;
#endif

        str[len++] = ch;
    }

    // trim string to exact length and add EOS to string
    // NOTE: the trim is optional -- without it, it just means the buffer may
    // be slightly larger than needed
    if (str != NULL) {
#if 1
        tmp = str;
        str = realloc(str,len + 1);
        if (str == NULL)
            free(tmp);
        else
            str[len] = 0;
#else
        str[len] = 0;
#endif
    }

    return str;
}
Craig Estey
  • 30,627
  • 4
  • 24
  • 48
0

Recommend to test why the loop stopped.

Memory management omitted for brevity.

while((ch = fgetc(fp)) != '\n' && ch != EOF) {
  str[len++] = ch;
}

// If _nothing_ read, return NULL
if (len == 0 && ch == EOF) {
  return NULL;
}

str[len]= '\0';
return str;
chux - Reinstate Monica
  • 143,097
  • 13
  • 135
  • 256