0

The CSV files that need to be read are quite large, but each of them only has only one line (from kb to gb), one large line. I tried to use the code below, but the maximum size can be only reached by 1000000, adds one more 0 resulted Segmentation fault. I read that the buffer has a limitation, but is there another way just to read that one large line and put then in an int array? or do I really need to use mmap? Thanks a lot!

struct IntArrLen {
 int length;
 int max_index;
 int* arr;
};


struct IntArrLen readInt(char* str){
    struct IntArrLen intArr;
    intArr.length = 0;
    intArr.arr = NULL;

    char file_name[100];
    strcat(file_name, str);
    FILE *fp;
    fp = fopen(file_name, "r");

    if (!fp){
        return intArr;
    }

    char* token = NULL;
    // define the maximum size of line
    int max_size = 1000000;
    char row_content[max_size];
    int* arr = (int *) malloc(sizeof(int) * max_size);
    fscanf(fp, "%[^\n]", row_content);

    int count = 0;
    token = strtok(row_content, ",");
    while(token != NULL){
        arr[count] = atoi(token);
        token = strtok(NULL, ",");
        count ++;
    }

    intArr.length = count;
    intArr.arr = (int *) malloc(sizeof(int) * intArr.length);
    memcpy(intArr.arr, arr, sizeof(int) * intArr.length);

    free(arr);
    return intArr;
}


the CSV file is something like this:

1245,2234532,23243523,235235325,325362436,5473658658,65879659,....

David Ranieri
  • 39,972
  • 7
  • 52
  • 94

0 Answers0