1

Been at this thing for awhile. I'm writing a C program that parses a csv file into a nested struct. When I pass the ** to the struct into the add_field function, I am able to get all of the pointers until I get to the field[(* f) - 1] pointer of the field array. That always returns NULL and I cannot figure out why. Any help would be greatly appreciated. Thank you.

Sorry for the huge code dump but I didn't want to leave anything out. I've been checking for null after every alloc but haven't incorporated them yet in this rewrite (I’m watching variable values with step in on clion).

#include <stdlib.h>
#include <stdio.h>
#include <errno.h>
#include <string.h>

/* Inline function for handling errors during memory allocation */
extern int errno;
#ifndef error
#define ERROR static inline void *
__attribute((always_inline)) ERROR error() {
    printf("%d\n", errno);
    printf("%s", strerror(errno));
    exit(EXIT_FAILURE);
}
#endif
/* ------------------------------------------------------------ */

#define MAX_BUFFER_SIZE 100


typedef struct data_field {

        char * data;
        size_t fldsize;

    } FIELD;

typedef struct data_set {

        int field_count;
        size_t setsize;
        FIELD ** field;

    } SET;

typedef struct csv_file {

        char * filename;
        int set_count;
        size_t filesize;
        SET ** set;

    } CSV;

CSV * alloc_csv();
void add_set(CSV ** fp, const int * setcnt);
void add_field(SET **sp, char *buffer, const int *f);
char * file_read(char * file_name);



int main()
{
    int b = 0;
    ulong bufcnt = 0;
    int fldcnt = 0;
    int setcnt = 0;
    int bmax = 100;

    char tok1 = '\n';
    char tok2 =  ',';
    char * buffer;
    char * stream;

    stream = file_read("/home/jonathon/Documents/programming/personal/csv/data_files/MOCK_DATA.csv");

    CSV * file = {0};
    void * filetmp = malloc(sizeof(CSV));
    file = filetmp;
    file->set_count = 0;

    void * arrtmp = calloc(1, sizeof(SET *));
    file->set = (SET **)arrtmp;

    void * settmp = calloc(1, sizeof(SET));
    file->set[0] = (SET *)settmp;

    setcnt++;

    void * buftmp = malloc(sizeof(char) * MAX_BUFFER_SIZE);
    buffer = buftmp;

    // read stream until end of field
    buftmp = malloc(sizeof(char) * MAX_BUFFER_SIZE);
    buffer = buftmp;
    for (int c = 0; stream[c] != '\0'; c++)
    {
        if (b >= bmax)
        {
            buftmp = realloc(buffer, sizeof(char) * (MAX_BUFFER_SIZE + bmax));
            buffer = buftmp;
        }

        switch (stream[c])
        {
            case 10:
                buffer[b] == '\0';
                b = 0;
                
                break;
            case 44:
                buffer[b] == '\0';
                add_field(&file->set[setcnt - 1], buffer, &fldcnt);
                fldcnt++;

                b = 0;
                break;
            default:
                buffer[b] = stream[c];
                b++;
                
        }
    }
}

void add_field(SET ** sp, char * buffer, const int * f)
{
    ulong buflen = strlen(buffer + 1);

    if ((*f) == 0)
    {
        (*sp)->field = (FIELD **)calloc(1, sizeof(FIELD *));
    }
    else
    {
        (*sp)->field = (FIELD **)realloc((*sp)->field, sizeof(FIELD *) * ((*f) + 1));
    }

    (*sp)->field[(*f) - 1] = (FIELD *)calloc(1, sizeof(FIELD));

    (*sp)->field[(*f) - 1]->data = (char *)calloc(buflen, sizeof(char));
    memcpy((*sp)->field[(*f) - 1]->data, buffer, buflen * sizeof(char));
}

void free_csv(CSV ** fp, const int * setcnt, const int * fldcnt)
{
    for (int i = 0; i < * setcnt; i++)
    {
        for (int j = 0; j < * fldcnt; j++)
        {
            free((* fp)->set[i]->field[j]->data);
            free((* fp)->set[i]->field[j]);
        }
        free((* fp)->set[i]->field);
        free((* fp)->set[i]);
    }
    free((* fp)->set);
    free(* fp);
}

char *file_read(char* file_name)
{
    FILE *fp = fopen(file_name, "rb");
    size_t file_size;

    char* file_buffer = NULL;

    if (!fp)
    {
        perror("Error: ");
        exit(EXIT_FAILURE);
    }
    else
    {
//              Seek to end of file to get file file_size
        fseek(fp, 0, SEEK_END);
        file_size = ftell(fp);
        file_buffer = (char *)calloc(file_size, sizeof(char));      // Allocate buffer file_size
        if (!file_buffer)
        {
            perror("Error: ");
            exit(EXIT_FAILURE);
        }

//              Seek to beginning of file to read from start.
        fseek(fp, 0, SEEK_SET);
        if (fread(file_buffer, file_size, 1, fp) != 1)        // Read into buffer
        {
            perror("Error: ");
            exit(EXIT_FAILURE);
        }

        fclose(fp);
    }

    return file_buffer;
}```
  • Run your code through valgrind. If you're mismanaging memory it will tell you where. Also, be sure to compile with all warnings enabled. – dbush May 19 '22 at 03:24
  • 1
    `buflen = strlen(buffer + 1);` Memory allocated with length `buflen` can only hold strings that are 2 characters shorter than what is in `buffer`. Is that intented? Normally the size to allocating memory for a string is calculated like this: `strlen(buffer)+1`. Also, if `buffer` is empty and not fully initialized to `0` you might walk through all your memory if you start at second byte. – Gerhardh May 19 '22 at 04:56
  • Unrelated. In C you should not cast the return value of `calloc` and friends. It makes code less readable and might hide errors. – Gerhardh May 19 '22 at 04:57
  • A single `=` is for assignment: `buffer[b] == '\0';` is a comparison statement with no effect (unused result). This will cause `add_field` to invoke [Undefined Behaviour](https://en.cppreference.com/w/c/language/behavior) when accessing the indeterminate values of `buffer` via `strlen`. What is `ulong`? Please post a complete [Minimal, Reproducible Example](https://stackoverflow.com/help/minimal-reproducible-example), and provide sample input (or structure it within the MRE), and the expected output. – Oka May 19 '22 at 05:33
  • 1
    Aside: `case 10:` and `case 44:` could be better understood as `case '\n':` and `case ',':`. `tok1` and `tok2` are unused. `sizeof (char)` is guaranteed to be `1`, and can be omitted from calculations. As previously mentioned, [do not cast the return of `malloc` in C](https://stackoverflow.com/a/605858/2505965), as any `void *` can be implicitly converted to any other pointer type. It appears all the `XXXtmp` variables are superfluous. You could slim this code down a bit with better practices; less to read is easier to debug. – Oka May 19 '22 at 05:42
  • `fread` works with raw bytes, not strings. If `fread` succeeds, `file_buffer` will be full. There is no guarantee `fread` will read a null-terminating byte from the file, and you treat the buffer as a string afterwards. Allocate an extra byte with `calloc`. That said: `ftell` is [not a reliable way](https://stackoverflow.com/a/49122325/2505965) of determining the "length" of a file. – Oka May 19 '22 at 05:50
  • Thank you everyone for going through my code. I’m at work but I’ll try to implement everyone’s suggestions on my breaks. Next time I’ll be sure to post a minimal, reproducible example. I’m fairly new at this so all of the advice is greatly appreciated! – Jonathon Propp May 19 '22 at 12:18

0 Answers0