2

I currently have a problem of reading data from a CSV file.

I think the code works almost fine. However, the printed output shows some weird characters as below (output 9-11).

Do you have any idea on what is happening here? I just want to get rid of these weird characters so that I can process the imported data accordingly.

Alternatively, if you have any feedback on my coding style, please share with me if you don't mind.

Output:

Obsns size is 150 and feat size is 4.
1. 5.100000, 3.500000, 1.400000, 0.200000, Iris-setosa
2. 4.900000, 3.000000, 1.400000, 0.200000, Iris-setosa
3. 4.700000, 3.200000, 1.300000, 0.200000, Iris-setosa
4. 4.600000, 3.100000, 1.500000, 0.200000, Iris-setosa
5. 5.000000, 3.600000, 1.400000, 0.200000, Iris-setosa
6. 5.400000, 3.900000, 1.700000, 0.400000, Iris-setosa
7. 4.600000, 3.400000, 1.400000, 0.300000, Iris-setosa
8. 5.000000, 3.400000, 1.500000, 0.200000, Iris-setosa
9. 4.400000, 2.900000, 1.400000, 0.200000, ��L>-setosa
10. 4.900000, 3.100000, 1.500000, 0.100000, Iris���=osa
11. 5.400000, 3.700000, 1.500000, 0.200000, Iris-set��L>
12. 4.800000, 3.400000, 1.600000, 0.200000, Iris-setosa
13. 4.800000, 3.000000, 1.400000, 0.100000, Iris-setosa
14. 4.300000, 3.000000, 1.100000, 0.100000, Iris-setosa

Code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int checkObsnsSize(char *dataFileName);
void readIris();

int main() {
    readIris();
    return 0;
}

void readIris() {    
    int featSize = 4;
    char *dataFileName = "iris.data";
    int obsnsSize = checkObsnsSize(dataFileName);
    float feat[featSize][obsnsSize];
    int label[obsnsSize];
    memset(feat, 0, featSize * obsnsSize * sizeof(float));
    memset(label, 0, obsnsSize * sizeof(int));

    printf("Obsns size is %d and feat size is %d.\n", obsnsSize, featSize);

    FILE *fpDataFile = fopen(dataFileName, "r");   
    if (!fpDataFile) {
        printf("Missing input file: %s\n", dataFileName);
        exit(1);
    }

    int index = 0;
    while (!feof(fpDataFile)) {
        char line[1024];
        char flowerType[20];

        fgets(line, 1024, fpDataFile);

        sscanf(line, "%f,%f,%f,%f,%[^\n]",
               &feat[1][index], &feat[2][index],
               &feat[3][index], &feat[4][index], flowerType);
        printf("%d. %f, %f, %f, %f, %s\n", ((int)index + 1),
               feat[1][index], feat[2][index],
               feat[3][index], feat[4][index], flowerType);
        index++;
    }
    fclose(fpDataFile);
}

int checkObsnsSize(char *dataFileName) {
    int obsnsSize = 0;
    char line[1024];

    FILE *fpDataFile = fopen(dataFileName, "r");
    if (!fpDataFile) {
        printf("Missing input file: %s\n", dataFileName);
        exit(1);
    }
    while (!feof(fpDataFile)) {
        fgets(line, 1024, fpDataFile);
        obsnsSize++;
    }
    fclose(fpDataFile);
    return obsnsSize;
}
Brian Tompsett - 汤莱恩
  • 5,753
  • 72
  • 57
  • 129
Frank Puk
  • 163
  • 2
  • 7
  • Btw I tried different compliers on Linux and I still got the same error. So I don't think the above problem is related to the choice of compiler. – Frank Puk Jan 13 '16 at 17:37
  • 1
    read this post http://stackoverflow.com/q/5431941/5339899 – TheQAGuy Jan 13 '16 at 17:38
  • 1
    Please provide your CSV file. I ran you code with a CSV file created from the output and the program appeared to run as expected. – Chimera Jan 13 '16 at 17:49
  • Thank you @chqrlie for helping me re-format the code for further readability. – Frank Puk Jan 13 '16 at 17:57

3 Answers3

1
sscanf(line, "%f,%f,%f,%f,%[^\n]", &feat[1][index], &feat[2][index],
         &feat[3][index], &feat[4][index], flowerType);
printf("%d. %f, %f, %f, %f, %s\n", ((int) index+1), feat[1][index], feat[2][index],
         feat[3][index], feat[4][index], flowerType);

In these two line you access index out of bounds here &feat[4][index]. This causes undefined behaviour.

As declaration of array is

 float feat[featSize][obsnsSize];     //where featSize is 4 

So you can access index from 0 to 3 not 4 (array indexing start from 0).

ameyCU
  • 16,489
  • 2
  • 26
  • 41
  • You are so right. The problem is solved after I reduce the value you specified by 1. Thank you ameyCU! – Frank Puk Jan 13 '16 at 17:48
  • @FrankPuk Yes , that because you were writing at invalid memory location . If you declare an array of size `n` and you can access indices from `0` to `n-1` . – ameyCU Jan 13 '16 at 17:49
1

A couple of things:

  • Don't check on feof, check the return value fgets() instead.

    while (!feof(fpDataFile)) {
    
  • Always check the return value of sscanf().

  • Your index should start at 0 instead of 1 (index 4 is out of bounds):

    sscanf(line, "%f,%f,%f,%f,%[^\n]",
           &feat[0][index], &feat[1][index],
           &feat[2][index], &feat[3][index], flowerType);
    printf("%d. %f, %f, %f, %f, %s\n", ((int)index + 1),
           feat[0][index], feat[1][index],
           feat[2][index], feat[3][index], flowerType);
    
  • As @chqrlie stated: use %19[^\n] to avoid overflow as flowerType size is only 20:

    sscanf(line, "%f,%f,%f,%f,%19[^\n]",
           &feat[0][index], &feat[1][index],
           &feat[2][index], &feat[3][index], flowerType);
    
Danny_ds
  • 11,201
  • 1
  • 24
  • 46
1

So to put it all together, the correct code looks like this:

1- Use fgets() return value to determine when the file is completely read.
2- Read into arrays starting at index 0
3- Check return value of sscanf().

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int checkObsnsSize(char * dataFileName);
void readIris ();

int main () {
    readIris();
    return 0;
}

void readIris() {

    int featSize = 4;

    char *dataFileName = "iris.data";
    int obsnsSize = checkObsnsSize(dataFileName);
    float feat[featSize][obsnsSize];
    int label[obsnsSize];
    memset(feat, 0, featSize*obsnsSize*sizeof(float));
    memset(label, 0, obsnsSize*sizeof(int));

    printf("Obsns size is %d and feat size is %d.\n", obsnsSize, featSize);

    FILE *fpDataFile = fopen(dataFileName,"r");

    if (!fpDataFile) {
        printf("Missing input file: %s\n", dataFileName);
        exit(1);
    }

    int index = 0;
    char line[1024]; char flowerType[20];

    while (fgets(line, 1024, fpDataFile))
    {
        if( 5 == sscanf(line, "%f,%f,%f,%f,%19[^\n]", &feat[0][index], &feat[1][index], &feat[2][index], &feat[3][index], flowerType))
        {
            printf("%d. %f, %f, %f, %f, %s\n", ((int) index+1), feat[0][index], feat[1][index], feat[2][index], feat[3][index], flowerType);
            index++;
        }
    }
    fclose(fpDataFile);
}

int checkObsnsSize(char * dataFileName) {

    int obsnsSize = 0;
    char line[1024];

    FILE *fpDataFile = fopen(dataFileName,"r");
    if (!fpDataFile) {
            printf("Missing input file: %s\n", dataFileName);
            exit(1);
        }
    while (!feof(fpDataFile)) {
        fgets(line, 1024, fpDataFile);
        obsnsSize++;
    }
    fclose(fpDataFile);
    return obsnsSize;
}
Chimera
  • 5,884
  • 7
  • 49
  • 81