0

So I'm new to C programming and have some trouble figuring out how to file scan and input each different data into each variable. I already think file scan with fscanf, fgets or fread, but i don't know how to do it. This is the code that i already write:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct testfile
    {
        char name[20];
        int number;
        int height;
        float weight;
    }file[100];

int main ()
{
    char buffer[100];
    char choose='T';
    int x,y,z;
    FILE *fp;
    fp=fopen("File.txt","r");

    /* Fscanf, but fail
    for(x=0;x<100;x++)
    {
        fscanf(fp,"%d %[^\n] %d %f",&file[x].number,file[x].name,&file[x].height,&file[x].weight);
        printf("%d %s %d %f\n",file[x].number,file[x].name,file[x].height,file[x].weight);
    }*/

    /* fgets, same fail too
    while(!feof(fp))
    {
        fgets(buffer,sizeof buffer,fp);
        sscanf(buffer,"%d",&file[x].number);
        fgets(buffer,sizeof buffer,fp);
        sscanf(buffer,"%[^\n]",file[x].name);
        fgets(buffer,sizeof buffer,fp);
        sscanf(buffer,"%d",&file[x].height);
        fgets(buffer,sizeof buffer,fp);
        sscanf(buffer,"%f",&file[x].weight);
        printf("%d %s %d %f\n",file[x].number,file[x].name,file[x].name,file[x].height,file[x].weight);
        x++;
    }
    */
    
    /*After the file is success scanned and save into each variable, the user can choose what number from the data and retrieve the following data on that number
    while (choose !='Y')
    {
    printf("Choose Data: ");
    scanf("%d",&file[x].number);
    printf("Data number : %d\nName : %s\nHeight : %d\nWeight : %.2f\n",file[x].number,file[x].name,file[x].name,file[x].height,file[x].weight);
    x++;
    printf("Get Another Data? (Y/N) : ");
    scanf("%c",&choose);
    }*/
    fclose(fp);
    return 0;
}

File.txt content:

No  Name                        Height (CM)     Weight (KG)             
1   Josh Broclin                    175             83 
2   Ryan Andreas                    184             98
3   Tom Norton                      162             111.6
4   Harry Syd                       190             68
5   Hayu Beurang                    181             75
6   Jeff Rick                       169             108
7   Asley Thomas                    179             104

This is the attempt using fscanf (not mixed with fgets)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct testfile
    {
        char name[20];
        int number;
        int height;
        float weight;
    }file[100];

int main ()
{
    char buffer[100];
    char choose='T';
    int x,y,z;
    FILE *fp;
    fp=fopen("File.txt","r");

     //Fscanf, but fail
    for(x=0;x<10;x++)
    {
        fscanf(fp,"%d %[^\n] %d %f",&file[x].number,file[x].name,&file[x].height,&file[x].weight);
        printf("%d %s %d %f\n",file[x].number,file[x].name,file[x].name,file[x].height,file[x].weight);
    }

    fclose(fp);
    return 0;

}

And this is the output:

0  0 0.000000
0  0 0.000000
0  0 0.000000
0  0 0.000000
0  0 0.000000
0  0 0.000000
0  0 0.000000
0  0 0.000000
0  0 0.000000
0  0 0.000000

Process returned 0 (0x0)   execution time : 0.020 s
Press any key to continue.

This one is using fgets (not mixed with fscanf)

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct testfile
    {
        char name[20];
        int number;
        int height;
        float weight;
    }file[100];

int main ()
{
    char buffer[100];
    char choose='T';
    int x,y,z;
    FILE *fp;
    fp=fopen("File.txt","r");

    while(!feof(fp))
    {
        fgets(buffer,sizeof buffer,fp);
        sscanf(buffer,"%d",&file[x].number);
        fgets(buffer,sizeof buffer,fp);
        sscanf(buffer,"%[^\n]",&file[x].name);
        fgets(buffer,sizeof buffer,fp);
        sscanf(buffer,"%d",&file[x].height);
        fgets(buffer,sizeof buffer,fp);
        sscanf(buffer,"%f",&file[x].weight);
        printf("%d %s %d %f\n",file[x].number,file[x].name,file[x].name,file[x].height,file[x].weight);
        x++;
    }
    fclose(fp);
    return 0;

}

The output for fgets:

538976309 1     Josh Broclin                            175   ☻ 4225408 0.000000
153100320 5     Hayu Beurang                    ♠ 4225440 0.000000

Process returned 0 (0x0)   execution time : 0.105 s
Press any key to continue.

Both method that i currently using give a wrong output. After the file is scanned, it can retrieve data information depend on what number the user choose, for example:

Choose Data: 3
Data number :3
Name : Tom Norton
Height : 162
Weight : 111.6
Get Another Data? (Y/N) : 

So how to solve this? I've been thinking about this problem and its solution, but can't find the solution. It really disturbs me whenever I want to sleep this problem comes again and figure out how to solve it. Thank you.

LearnerC
  • 51
  • 6
  • 2
    Unless you have delimiters for the name, then the only way to detect if the person has one, two, or more names will be to look for the numeric data 'height'. I suggest you look at using CSV text files. Whatever you do, don't mix the different file reading functions, they don't easily work next to each other. I would use `fgets` and process the string, you try but call it multiple times for the same line. Also please see [Why is `while ( !feof (file) )` always wrong?](http://stackoverflow.com/questions/5431941/why-is-while-feof-file-always-wrong) – Weather Vane Jun 21 '22 at 13:39
  • Ploease [edit] your question and add details about how exactly your attempts to use `fscanf` or `fgets`/`sscanf` failed. You should use `fgets` once to read a line, then (somehow) parse it. (This *could* involve multiple `sscanf`.) Calling `fgets`/`sscanf` for every field is wrong. How is the format of your input defined? Do you have fixed column widths? What would happen with a long name, e.g. "Hadschi Halef Omar Ben Hadschi Abul Abbas Ibn Hadschi Dawuhd al Gossarah"? (literary character, https://en.wikipedia.org/wiki/Hadschi_Halef_Omar) – Bodo Jun 21 '22 at 13:50
  • @WeatherVane i don't mix fgets and fscanf, those are just my usual method to scan a file. I have already tried one of them but still it fail. I use TXT files because for practice. If there is 30 lines in the text file, so I must call fgets for 30 times too? – LearnerC Jun 21 '22 at 14:12
  • Seeing as they are all comemnted out it's hard to know what you are mixing. You do call `fgets` for every data item, but they are on the same line. Call it once and apply `sscanf` but without field delimitors you must know how many name components there are and scan them individually. The scan with `%s` stops at the first whitespace character. You tried `sscanf(buffer,"%[^\n]",file[x].name);` but the name is not followed by a newline. – Weather Vane Jun 21 '22 at 14:15
  • ...so the short answer is to think again about the file storage format, so you know where the fields begin and end. For example `1,Josh Broclin,175,83` and then use `strtok` to break up the line. – Weather Vane Jun 21 '22 at 14:18
  • 2
    If you are going to use `*scanf`, always use a width modifier on your conversion specifiers. Even if you limit the read with `fgets`, you can cause UB with `sscanf(buffer,"%[^\n]",&file[x].name);`. Instead, use `sscanf(buffer,"%19[^\n]", &file[x].name);`. – William Pursell Jun 21 '22 at 14:31

1 Answers1

1

Reiterating and building upon the comments, the primary problem here is allowing spaces in a field but also using spaces as delimiters between fields.

1   Josh Broclin                    175             83
-^^^----*-------^^^^^^^^^^^^^^^^^^^^---^^^^^^^^^^^^^--

(In this figure: - is data, ^ are delimiters, and * wants to be both.)

With fscanf, the %s specifier works by reading and ignoring leading whitespace characters, then reading as many non-whitespace characters as it can, until stopping when it encounters a trailing whitespace character.

After %s reads the J in the example above, the next whitespace character encountered will end the conversion. There is no special distinction between the spaces marked ^ and the space marked *.

As %[^\n], the specifier %[ will read and store any character, stopping when new line character is encountered.

Note that using these specifiers without a field-width is as unsafe as gets, as they will not limit how many characters are read into the buffer. This is an easy way to overflow a buffer. A field-width is specified in the form %19s, and should be at most the size of the buffer minus one - always leaving room for the null-terminating byte.

A secondary problem is the use of fgets four times per iteration. This will attempt to read four separate lines of input, when really you want to read one line at a time.

Additionally, you do not check the return values of fgets or fscanf, meaning you may be operating on indeterminate values if these functions failed.

See here why while(!feof(fp)) is always wrong.


A quick and simple solution is to just treat the first and last names as separate fields in the file, and concatenate them in memory. snprintf can be used to safely determine if concatenating the two names resulted in truncation of the resulting string.

This will work for the data file you have shown.

#include <stdio.h>
#include <stdlib.h>

#define MAX_PEOPLE 100

struct person {
    char name[20];
    int number;
    int height;
    float weight;
};

int parse(struct person *p, char *input)
{
    char first[20];
    char last[20];

    int cons = sscanf(input, "%d%19s%19s%d%f",
            &p->number, first, last, &p->height, &p->weight);

    return 5 == cons &&
        snprintf(p->name, sizeof p->name, "%s %s", first, last) < sizeof p->name;
}

int main(int argc, char **argv)
{
    if (argc < 2) {
        fprintf(stderr, "usage: %s FILENAME\n", *argv);
        return EXIT_FAILURE;
    }

    FILE *stream = fopen(argv[1], "r");

    if (!stream) {
        perror("fopen");
        return EXIT_FAILURE;
    }

    size_t n = 0;
    struct person people[MAX_PEOPLE] = { 0 };
    char buffer[512];

    /* consume the header */
    (void) fgets(buffer, sizeof buffer, stream);

    while (n < MAX_PEOPLE && fgets(buffer, sizeof buffer, stream)) {
        if (parse(people + n, buffer))
            n++;
    }

    fclose(stream);

    for (size_t i = 0; i < n; i++)
        printf("%d - <<%s>> H:%d W:%f\n",
                people[i].number,
                people[i].name,
                people[i].height,
                people[i].weight);

}

While simple, the solution above does impose the requirement that people have exactly two names - something that does not hold true for everyone. Using comma-separated values, would be a more robust solution.

For the file:

1,Josh Broclin,175,83
2,Ryan Andreas,184,98
3,Tom Norton,162,111.6
4,Harry Syd,190,68       
5,Hayu Beurang,181,75    
6,Jeff Rick,169,108      
7,Asley Thomas,179,104
8,This Person Has Many Names,200,100

Here is a cursory CSV parser, using strtok:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAX_PEOPLE 100
#define EXPECTED_FIELDS 4
#define DEL ",\r\n"

struct person {
    char name[64];
    int number;
    int height;
    float weight;
};

typedef int (*parser_action)(struct person *, char *);

int parse_int(int *d, char *s)
{
    return 1 == sscanf(s, "%d", d);
}

int parse_number(struct person *p, char *field)
{
    return parse_int(&p->number, field);
}

int parse_name(struct person *p, char *field)
{
    return snprintf(p->name, sizeof p->name, "%s", field) < sizeof p->name;
}

int parse_height(struct person *p, char *field)
{
    return parse_int(&p->height, field);
}

int parse_weight(struct person *p, char *field)
{
    return 1 == sscanf(field, "%f", &p->weight);
}

int parse(struct person *p, char *input)
{
    parser_action field_actions[EXPECTED_FIELDS] = { parse_number, parse_name, parse_height, parse_weight };

    size_t i = 0;
    char *tok = strtok(input, DEL);

    while (i < EXPECTED_FIELDS && tok) {
        if (!field_actions[i++](p, tok))
            return 0;

        tok = strtok(NULL, DEL);
    }

    return EXPECTED_FIELDS == i;
}

int main(int argc, char **argv)
{
    if (argc < 2) {
        fprintf(stderr, "usage: %s FILENAME\n", *argv);
        return EXIT_FAILURE;
    }

    FILE *stream = fopen(argv[1], "r");

    if (!stream) {
        perror("fopen");
        return EXIT_FAILURE;
    }

    size_t n = 0;
    struct person people[MAX_PEOPLE] = { 0 };
    char buffer[512];

    while (n < MAX_PEOPLE && fgets(buffer, sizeof buffer, stream)) {
        if (parse(people + n, buffer))
            n++;
    }

    fclose(stream);

    for (size_t i = 0; i < n; i++)
        printf("%d - <<%s>> H:%d W:%f\n",
                people[i].number,
                people[i].name,
                people[i].height,
                people[i].weight);

}
Oka
  • 23,367
  • 6
  • 42
  • 53
  • Wow, sir I really didn't expect the solution will be complex like that... This is literally outside my understanding. My lecturer never explains or ever mention this in my first year of college... Fantastic sir. I literally never think that the solution to this problem will be that complex, if there is a much simpler solution that I can fully understand that would be very appreciated... Btw, Thank you so much for the solution! @Oka – LearnerC Jun 21 '22 at 19:48
  • Is there something specific I can clarify? The code in the second example is somewhat advanced, but the take away should be that trying to allow the *delimiter* to appear in the data *fields* defeats the purpose of the delimiter. What if you were to add a *Place of Birth* field to your file? Depending on where you put the field, the data becomes very tedious, if not impossible to parse. Consider `7 Asley Thomas Los Angeles 179 104` and `4 Tom John Kingston Paris 162 123.6` - where do the names end and the locations begin? Using a delimiter than can not appear in a field solves this easily. – Oka Jun 21 '22 at 23:58