5

I'm reading a file and want to put each line into a string in an array. The length of the file is arbitrary and the length of each line is arbitrary (albeit assume it will be less than 100 characters).

Here's what I've got and it's not compiling. Essentially this is an array to an array of characters, right? So shouldn't it be char** words = (**char)malloc(sizeof(*char));?

#include <stdio.h>
#include <stdlib.h>

int main(){


 int BUFSIZE = 32767;//max number of lines to read
 char** words = (**char)malloc(sizeof(*char));//gives error: expected expression before 'char'
 FILE *fp = fopen("coll.txt", "r");
 if (fp == 0){
        fprintf(stderr, "Error opening file");
        exit(1);
 }

int i = 0;
words[i] = malloc(BUFSIZE);
while(fscanf(fp, "%100s", words[i]) == 1)//no line will be longer than 100
{
        i++;
        words[i] = realloc(words, sizeof(char*)*i);
 }

 int j;
 for(j = 0; j < i; j++)
    printf("%s\n", words);

 return 0;
}

Note: I've read "Reading from a file and storing in array" but it doesn't answer my question.

Jason
  • 542
  • 5
  • 24
Celeritas
  • 14,489
  • 36
  • 113
  • 194
  • it should be char*... you are trying to find size of character pointer... – AurA Oct 04 '13 at 04:53
  • 1
    Note that `%100s` will (a) skip leading white space and stop reading at the white space after a non-white space character, and (b) will overflow a buffer of size 100 by one byte, which might matter. You have to specify one less than the size of the array in the conversion specification. – Jonathan Leffler Oct 04 '13 at 07:08

2 Answers2

15

There are a few issues with your program. The realloc() statement is not used correctly. I also prefer fgets() for getting a line. Here is my solution. This also uses realloc() to increase the allocation of the buffer lines so that you neither have to know the number of lines in advance nor do you have to read the file in two passes (faster that way). This is a common technique to use when you don't know how much memory you'll have to allocate in advance.

#include <stdio.h>
#include <stdlib.h>

int main(void)

    {
    int lines_allocated = 128;
    int max_line_len = 100;

    /* Allocate lines of text */
    char **words = (char **)malloc(sizeof(char*)*lines_allocated);
    if (words==NULL)
        {
        fprintf(stderr,"Out of memory (1).\n");
        exit(1);
        }

    FILE *fp = fopen("coll.txt", "r");
    if (fp == NULL)
        {
        fprintf(stderr,"Error opening file.\n");
        exit(2);
        }

    int i;
    for (i=0;1;i++)
        {
        int j;

        /* Have we gone over our line allocation? */
        if (i >= lines_allocated)
            {
            int new_size;

            /* Double our allocation and re-allocate */
            new_size = lines_allocated*2;
            words = (char **)realloc(words,sizeof(char*)*new_size);
            if (words==NULL)
                {
                fprintf(stderr,"Out of memory.\n");
                exit(3);
                }
            lines_allocated = new_size;
            }
        /* Allocate space for the next line */
        words[i] = malloc(max_line_len);
        if (words[i]==NULL)
            {
            fprintf(stderr,"Out of memory (3).\n");
            exit(4);
            }
        if (fgets(words[i],max_line_len-1,fp)==NULL)
            break;

        /* Get rid of CR or LF at end of line */
        for (j=strlen(words[i])-1;j>=0 && (words[i][j]=='\n' || words[i][j]=='\r');j--)
            ;
        words[i][j+1]='\0';
        }
    /* Close file */
    fclose(fp);

    int j;
    for(j = 0; j < i; j++)
        printf("%s\n", words[j]);

    /* Good practice to free memory */
    for (;i>=0;i--)
        free(words[i]);
    free(words);
    return 0;
    }
willus
  • 501
  • 3
  • 7
  • 1
    +1. I strongly recommend putting the semicolon which is the body of the `for` loop on a line on its own. When appended to the same line after the close parenthesis, it is too easily mistaken for a typo, or overlooked. Nominally, you could have several `'\r'` characters on a single line; it might matter (but more likely won't). – Jonathan Leffler Oct 04 '13 at 07:11
  • @JonathanLeffler -- I implemented your suggestions. Thank you. – willus Oct 04 '13 at 12:38
  • 1
    @willus how did you decide on the value 128 for `int lines_allocated`? – Celeritas Oct 05 '13 at 00:35
  • @Celeritas -- Good question. Somewhat randomly. You want to pick a number that is not so large that you are over-allocating memory initially, but not so small as to cause a lot of realloc calls. You're only allocating an array of pointers, so you could start with a larger value. The factor of 2 multiplier could also be adjusted. I've seen 1.5 used. It partly depends on what you think your typical case will be as well. If you really wanted, you could do a study on memory use and run time for different cases to optimize the values, but the variation in performance would likely be small. – willus Oct 05 '13 at 10:36
  • 1
    Nice answer! The for loop to remove `\n` and/or `\r` from the end of line removes one character to many, putting the `\0` at the last character of the string that is not a newline character (thus truncating that last character). You should probably change `words[i][j]` to `words[i][j+1]`. Also, it's a good habit to close the open file with `fclose()`. – Stefan van den Akker Jan 12 '15 at 13:50
  • @Neftas -- good catches. Thank you. I have edited the source code per your suggestions. – willus Mar 07 '15 at 15:16
1

You should change the line:

char** words = (**char)malloc(sizeof(*char));

into this:

char** words=(char **)malloc(sizeof(char *)*Max_Lines);
Cobain
  • 186
  • 1
  • 8