0

I have quite a big dictionary file. I want to take each line from the file and store it in an array so I can perform manipulations later. For example given the words: aaaa arggghhh broooooo Coooodee

If I call array[2], it should give me "broooooo". I have tried using the code below however I keep running into segmentation faults. Any help would be greatly appreciated.

Here is the code I have been trying:

int main (int argc, char* argv[]) {
    char* file="/usr/share/dict/words";

    FILE *dict;
    char str[60];
    char arr[80368][60];
    int count = 0;
    dict = fopen(file, "r");
    if(dict == NULL){
            perror("Error opening file");
            return(-1);
    }
    while(fgets(str,sizeof(str),dict) != NULL){
            strcpy(arr[count], str);
            count++;
    }

    fclose(dict);
    
    return 0;

}

Josh K
  • 1
  • 2
    Those arrays are more than 4mb on the stack. Depending on your platform and compile flags that may be too large. You could try moving them outside main and see if that makes a difference. – Retired Ninja Aug 25 '21 at 07:15
  • 2
    On many systems `char arr[80368][60];` is just too big as a variable with automatic storage duration. Try `static char arr[80368][60];` – Support Ukraine Aug 25 '21 at 07:18
  • OT: `fgets` will not give you **one** word but one line... – Support Ukraine Aug 25 '21 at 07:23
  • Try making the array much smaller, so you can confirm that it is a variable size issue as suggested by the comments above. Also, make sure your loop stops when the maximum number of words (as given by the array size) have been read. – Erich Kitzmueller Aug 25 '21 at 07:27

2 Answers2

0

char arr[80368][60];

This will try to allocate 4822080 bytes on the stack. The default maximum should be 8Mb so it is lower than that, but maybe it is configured lower on your system? You can inspect with ulimit -a | grep stack.

Does your program work if you test with a smaller input file, say generated with head -100 /usr/share/dict/words > input.txt, and then with the size of arr reduced correspondingly?

hlovdal
  • 26,565
  • 10
  • 94
  • 165
0

This is the best method to read file when you don't know the size.

If you want the file inside array you just need to use str_split else the file is inside char *

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <fcntl.h>
#include <stddef.h>

char **str_split(char *cmd, char split_by)
{
    char **argv = malloc(sizeof(char *) * strlen(cmd));
    int pos = 0;

    for (int i = 0; cmd[i] != '\0'; i++) {
        if (cmd[i] == split_by || cmd[i] == '\0') {
            cmd[i] = '\0';
            argv[pos] = strdup(cmd);
            pos++;
            cmd += i + 1;
            i = 0;
        }
    }
    argv[pos] = strdup(cmd);
    argv[pos + 1] = NULL;
    return argv;
}

char *load_file(char *file_path)
{
    int fd;
    struct stat file_stat;
    stat(file_path, &file_stat);
    int file_size = file_stat.st_size;
    char *str = malloc(sizeof(char) * file_size + 2);

    if ((fd = open(file_path, O_RDONLY)) == -1) {
        printf("error");
        exit(84);
    }
    read(fd, str, file_size);
    str[file_size] = '\0';

    return str;
}


int main(int ac, char **av)
{
    char *file_in_string = load_file(av[1]);
    printf("this is the file in one string:\n%s\n", file_in_string);
    char **file_in_array = str_split(file_in_string, '\n');
    printf("this is the file inside array");
    for (int i = 0; file_in_array[i]; i++)
        printf("line [%d]: %s\n", i, file_in_array[i]);

}
marc_s
  • 732,580
  • 175
  • 1,330
  • 1,459
gabriel
  • 42
  • 9