1

I have a CSV file with 7 fields:

me,val1,val2,val3,val4,val5,val6,
me,val1,val2,val3,val4,val5,val6,

I am parsing this using the following function:

void readcsv() {
    lk_dispclr();
    lk_disptext(2, 0, "Parsing CSV..", 0);
    lk_disptext(3, 0, "Please Wait..", 0);

    FILE *stream = fopen("input.csv", "r");
    if (stream != NULL) {
        char line[1024];
        while (fgets(line, 1024, stream)) {
            char *tmp = strdup(line);      
            char a1[20] = "";
            char b1[20] = "";
            char c1[20] = "";
            char d1[20] = "";
            char e1[20] = "";
            char f1[20] = ""; 
            char g1[20] = ""; 

            strcat(a1, getcsvfield(tmp, 1));
            strcat(b1, getcsvfield(tmp, 2));
            strcat(c1, getcsvfield(tmp, 3));
            strcat(d1, getcsvfield(tmp, 4));
            strcat(e1, getcsvfield(tmp, 5));
            strcat(f1, getcsvfield(tmp, 6));
            strcat(g1, getcsvfield(tmp, 7));

            //printf("Field 1 would be %s\n", a1);
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 2));
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 3));
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 4));
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 5));
            //printf("Field 2 would be %s\n", getcsvfield(tmp, 6));
            execute("INSERT INTO sdata  (uid,sid,name,area,type,stbamount,pkgamount)"
                    "   VALUES('%s','%s','%s','%s','%s','%s','%s');",
                    a1, b1, c1, d1, e1, f1, g1);
            // NOTE strtok clobbers tmp
            free(tmp);
        }
        lk_dispclr();
        lk_disptext(2, 4, "CSV Imported!", 1);
        lk_getkey();
    } else {
        lk_dispclr();
        lk_disptext(2, 4, "CSV Not Found!", 1);
        lk_getkey();
    }
}

//Used for parsing CSV
const char *getcsvfield(char *line, int num) {
    char buffer[1024] = { 0 };
    strcpy(buffer, line);
    const char *tok;
    for (tok = strtok(buffer, ",");
         tok && *tok;
         tok = strtok(NULL, ",\n"))
    {
        if (!--num)
            return tok;
    }
    return NULL;
}

But if the 6th field (val5) is missing val6 is getting inserted in the Table at the position for val5, where it actually should be blank.

What am I doing wrong?

chqrlie
  • 131,814
  • 10
  • 121
  • 189
techno
  • 6,100
  • 16
  • 86
  • 192
  • In http://stackoverflow.com/questions/32349263/c-regex-how-to-match-any-string-ending-with-or-any-empty-string/32351114#32351114 I provide a basic CSV parser that can serve your needs. – Paul Ogilvie Feb 27 '16 at 14:45

2 Answers2

1

Your code has several problems

  • the main issue is you return a pointer to automatic storage in getcsvfield: you copy the line into the local array buffer and use strtok to parse it. When you return the n-th element, tok points inside buffer which is a local array. referencing this array after you return from the function getcsvfield invokes undefined behavior. You can fix this problem by copying the field into a buffer received as an argument to getcsvfield.

  • regarding empty value, you cannot use strtok to parse the CSV format: it first skips all occurrences of the delimiter characters, thus you cannot have empty fields as a sequence of , will be interpreted as a single separator. strtok is an obsolete function that uses a hidden global state, you probably should avoid using it in other places too.

Here is an improved version:

#include <stdio.h>
#include <string.h>

//Used for parsing CSV
char *getcsvfield(char *dest, int size, const char *line, int num) {
    const char *p;

    for (p = line; *p != '\0' && *p != '\n';) {
        int len = strcspn(p, ",\n");  /* parse field characters */
        if (--num <= 0) {
            if (len >= size)
                len = size - 1;
            memcpy(dest, p, len);
            dest[len] = '\0';
            return dest;
        }
        p += len;
        if (*p == ',')
            p++;
    }
    *dest = '\0';
    return NULL;
}

void readcsv(void) {
    lk_dispclr();
    lk_disptext(2, 0, "Parsing CSV..", 0);
    lk_disptext(3, 0, "Please Wait..", 0);

    FILE *stream = fopen("input.csv", "r");
    if (stream != NULL) {
        char line[1024];
        while (fgets(line, 1024, stream)) {
            char a1[20], b1[20], c1[20], d1[20], e1[20], f1[20], g1[20]; 

            getcsvfield(a1, sizeof a1, line, 1);
            getcsvfield(b1, sizeof b1, line, 2);
            getcsvfield(c1, sizeof c1, line, 3);
            getcsvfield(d1, sizeof d1, line, 4);
            getcsvfield(e1, sizeof e1, line, 5);
            getcsvfield(f1, sizeof f1, line, 6);
            getcsvfield(g1, sizeof g1, line, 7);

            execute("INSERT INTO sdata  (uid,sid,name,area,type,stbamount,pkgamount)"
                    "   VALUES('%s','%s','%s','%s','%s','%s','%s');",
                    a1, b1, c1, d1, e1, f1, g1);
        }
        fclose(stream);
        lk_dispclr();
        lk_disptext(2, 4, "CSV Imported!", 1);
        lk_getkey();
    } else {
        lk_dispclr();
        lk_disptext(2, 4, "CSV Not Found!", 1);
        lk_getkey();
    }
}

Note that your insertion method might allow an attacker to perform SQL injection via the CSV file. In the above example, it would be difficult because of the 20 byte limitation per field, but in other places you should be more careful when composing SQL commands. It is also possible that SQlite perform sanity checking on the execute arguments.

chqrlie
  • 131,814
  • 10
  • 121
  • 189
  • Thanks a lot for the answer.Sorry for the late reply.. i was very busy with other things.I understand its an issue with pointers and the strtok call.Regarding the SQL Injection.. i understand that.. but this is for a embedded device that will be carried by authorized personal.I currently dont have access to the device .. i will try and get back tomorrow :) – techno Feb 28 '16 at 16:00
  • The empty values are still being replaced by the next entry in the CSV. – techno Feb 29 '16 at 04:58
  • @techno: I updated the answer to fix a couple of typos. I tested the parser with empty values (sequences of `,`): it works as expected on my test file. Can you provide an sample for which it fails? – chqrlie Feb 29 '16 at 11:02
  • Thanks .. i fixed your old code by adding `if (*p == ' ') p++;` where did you implement the logic for empty field checking? – techno Mar 01 '16 at 07:10
  • @techno: I just use `len = strcspn(p, ",\n");` to compute the number of characters in the current field, which will be 0 for an empty field. `p += len;` skips the field contents, if `*p` is a `,`, I skip the comma and `p` points to the next field. Where did you add the `if (*p == ' ') p++;` logic? – chqrlie Mar 01 '16 at 10:06
  • I add it like this `p += len; if (*p == ',') p++; if (*p == ' ') p++;` – techno Mar 01 '16 at 12:36
  • I hope you have seen the other answer that involved reading the file to an array.. i had pointed out that this can lead to memory overflow and lead to crash..as im doing this for a device with limited memory.The answer is deleted now.I just tried the old version of your code with my edit.. on the device,it works flawlessly now :) ..so i will keep it.Hope your implementation saves much memory that the other answer. – techno Mar 01 '16 at 12:40
1

If you can work with an array fields[ITEMS][LENGTH] instead of individual variables a1[LENGTH], b1[LENGTH], ..., the array could be passed to the function and filled with one call.

#include <stdio.h>
#include <string.h>
#include <stdlib.h>

#define ITEMS 7
#define LENGTH 20

void getcsvfields ( char *psource, char *pdelim, char (*fields)[LENGTH], int index);

int main()
{
    char csv1[] = "me;val1;;val3;val4;;val6;";
    char csv2[] = ";val1;;val3;";
    char fields[ITEMS][LENGTH] = {{'\0'}};//array to store values
    int each = 0;

    //pass the line, delimiter(s), array, number of array elements
    getcsvfields ( csv1, ";", fields, ITEMS);
    //print each extracted string
    for ( each = 0; each < ITEMS; each++) {
        printf ( "fields[%d] %s\n", each, fields[each]);
    }

    //pass the line, delimiter(s), array, number of array elements
    getcsvfields ( csv2, ";", fields, ITEMS);
    //print each extracted string
    for ( each = 0; each < ITEMS; each++) {
        printf ( "fields[%d] %s\n", each, fields[each]);
    }

    return 0;
}

void getcsvfields ( char *psource, char *pdelim, char (*fields)[LENGTH], int index) {
    char *pnt;
    char *cur;
    int span = 0;
    int item = 0;

    if ( psource && pdelim) {//check for null pointers
        cur = psource;
        pnt = psource;
        while ( pnt) {
            pnt = strpbrk ( cur, pdelim);
            if ( pnt) {
                fields[item][0] = '\0';
                if ( pnt != cur) {
                    span = pnt - cur;
                    if ( span < LENGTH - 1) {
                        memcpy ( &fields[item][0], cur, span);
                        fields[item][span] = '\0';
                    }
                }

                item++;
                if ( item >= index) {
                    return;
                }
                cur = pnt + 1;
            }
        }
        while ( item < index) {
            fields[item][0] = '\0';
            item++;
        }
    }
}

Using the function in your program would look something like:

void readcsv() {
    lk_dispclr();
    lk_disptext(2, 0, "Parsing CSV..", 0);
    lk_disptext(3, 0, "Please Wait..", 0);

    FILE *stream = fopen("input.csv", "r");
    if (stream != NULL) {
        char line[1024];

        //declare the array
        char fields[7][20] = {{'\0'}};

        while (fgets(line, 1024, stream)) {

            // call the function here to get the line into the array
            getcsvfields ( line, ";", fields, 7);

            execute("INSERT INTO sdata (uid,sid,name,area,type,stbamount,pkgamount)"
            "   VALUES('%s','%s','%s','%s','%s','%s','%s');",
            fields[0], fields[1], fields[2], fields[3], fields[4], fields[5], fields[6]);
        }
        lk_dispclr();
        lk_disptext(2, 4, "CSV Imported!", 1);
        lk_getkey();
    } else {
        lk_dispclr();
        lk_disptext(2, 4, "CSV Not Found!", 1);
        lk_getkey();
    }
}
user3121023
  • 8,181
  • 5
  • 18
  • 16