0

My goal is to read from test.txt, then output its contents. However, the thing is, I get stuck in the sscanf loop. So it keeps reading Australia over and over again

test.txt

    Australia   Sydney Perth Brisbane
    USA  California Los-Angeles Silicon-Valley Dallas
    Canada  Toronto

Excepted output

Country: Australia
Cities: Sydney Perth Brisbane
---------------
Country: USA
Cities: California Los-Angeles Silicon-Valley Dallas
---------------
Country: Canada
Cities: Toronto
---------------

My code

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define MAX 2000

int main (void) {

   FILE *fp = fopen("test.txt" ,"r");
   char buf[MAX + 1];
   char country[MAX];
   char city[MAX];

   while (fgets(buf, MAX, fp) != NULL) {

      sscanf(buf, "%s", country);
      printf("Country: %s\n", country);

      printf("Cities:");
      while (sscanf(buf, "%s", city) == 1) {
         printf(" %s", city);
      }

      printf("\n---------------\n");
   }
}
James Coles
  • 51
  • 1
  • 5

3 Answers3

1

You enter an infinite loop because you are attempting to parse every city from the same place you parsed country -- the beginning of buf. To use sscanf to incrementally parse whitespace separated string from buf, you need to additionally use the "%n" conversion specifier to obtain the number of characters (nchar below) consumed by sscanf on each read. You can then add that to an offset (off below) to successively parse each city from buf following parsing the country.

The approach is straight-forward, use sscanf with the "%s%n" format string to parse the whitespace delimited string into an array saving the number of characters read/consumed by sscanf in an integer variable. For example:

    while (fgets (buf, MAXC, fp)) {             /* read each line */
        int nchar = 0;
        char cc[MAXC] = ""; /* buffer for country/city */
        if (sscanf (buf, "%s%n", cc, &nchar)) { /* parse country, get used */
            int off = nchar;                    /* add used char to offset */
            printf ("%s\n", cc);
            /* read each city getting used chars to add to offset */
            while (sscanf (buf + off, "%s%n", cc, &nchar) == 1) {
                printf ("  %s\n", cc);
                off += nchar;
            }
        }
    }

Above buf + off provides the location in buf to begin parsing each city. Also note the use of "%n" does NOT increase the conversion count (e.g. the sscanf return).

Complete example:

#include <stdio.h>

#define MAXC 2048   /* good use of constanst, but avoid common MAX */

int main (int argc, char **argv) {

    char buf[MAXC] = "";
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }

    while (fgets (buf, MAXC, fp)) {             /* read each line */
        int nchar = 0;
        char cc[MAXC] = ""; /* buffer for country/city */
        if (sscanf (buf, "%s%n", cc, &nchar)) { /* parse country, get used */
            int off = nchar;                    /* add used char to offset */
            printf ("%s\n", cc);
            /* read each city getting used chars to add to offset */
            while (sscanf (buf + off, "%s%n", cc, &nchar) == 1) {
                printf ("  %s\n", cc);
                off += nchar;
            }
        }
    }

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    return 0;
}

Example Use/Output

$ ./bin/rdcountrycity <dat/countrycity.txt
Australia
  Sydney
  Perth
  Brisbane
USA
  California
  Los-Angeles
  Silicon-Valley
  Dallas
Canada
  Toronto

While using sscanf to parse the country and cities from each line of text is fine, there is a tool better suited for the job, e.g. strtok which is used to tokenize a sting into tokens based on the delimiters you provide. You can provide delimiters of " \t\n" (space, tab, newline) to simply parse each whitespace delimited word from each line.

It's actually much simpler, e.g.

#include <stdio.h>
#include <string.h>

#define MAXC 2048       /* good use of constanst, but avoid common MAX */
#define DELIM " \t\n"   /* you can define character contstants too */

int main (int argc, char **argv) {

    char buf[MAXC] = "";
    /* use filename provided as 1st argument (stdin by default) */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        perror ("file open failed");
        return 1;
    }

    while (fgets (buf, MAXC, fp)) {             /* read each line */
        char *p = buf;
        if ((p = strtok (buf, DELIM))) {        /* tokenize country */
            printf ("%s\n", p);
            while ((p = strtok (NULL, DELIM)))  /* tokenize each city */
                printf ("  %s\n", p);
        }
    }

    if (fp != stdin) fclose (fp);   /* close file if not stdin */

    return 0;
}

(the output is the same)

(note: strtok modifies the original string, so you will need to make a copy of buf to preserve the original if required)

Look things over and let me know if you have any further questions.

David C. Rankin
  • 81,885
  • 6
  • 58
  • 85
0

You need to have an integer offset, call it off, then pass buf + off as the first argument of sscanf instead of just buf:

printf("Cities:");

int off = 0;
while (sscanf(buf + off, "%s", city) == 1) {
    printf(" %s", city);
    off += strlen(city);
}

As for:

int off;
for (off = 0; sscanf(buf + off, "%s", city) == 1; off += strlen(city))
    printf(" %s", city);
lost_in_the_source
  • 10,998
  • 9
  • 46
  • 75
0

Your code has a buffer overflow, which is dangerous.

When you read from a long line, the line may not end in \n. If that line doesn't contain any whitespace, the city buffer cannot hold the complete line.

Roland Illig
  • 40,703
  • 10
  • 88
  • 121