1

I've been trying to figure this one out for a while now, and I feel like I have to be close. Basically, I have a data file containing various country records separated by new lines. Each record contains comma separated fields, of which I am trying to extract certain ones.

For example (as a single line):

60,AFG,Afghanistan,Asia,Southern and Central Asia,652090,1919,22720000,45.9,5976,Afganistan/Afqanestan,Islamic Emirate,Mohammad Omar,1,AF

Each one of these lines will make up a struct. Essentially, I want to read each one of these lines and insert it into an array of struct pointers (so dynamically). I also only want specific fields. When I "tokenize" the line I want the fields for code, name, population, and life expec. respectively:

AFG, Afghanistan, 22720000, 45.

My thought was to use fgets() to read each line in the file, and in a loop malloc() some memory for the pointers, tokenize on the fields I want, then insert. However, something that I'm doing must be wrong, as various tests don't seem to show anything in my output.

Here is my work thus far. I would appreciate any and all help.

#include "allheaders.h" // contains all common headers for personal use

#define BUF_SIZE 512
#define NUM_RECS 238

typedef struct {
   char code[4];
   char name[40];
   int population;
   float lifeExpectancy;
} Country;

typedef Country *countryPtr;

int main( int argc, const char* argv[] ) {

/* Opening the file */
FILE *filePtr;  // pointer to file
if ((filePtr = fopen("AllCountries.dat", "r")) == NULL) {   // if couldn't open file
    printf("Error opening file\n"); // error message
    exit(1);
}

/* Reading the file */
char buffer[BUF_SIZE];  // buffer to read
int index = 0;
char *token;
countryPtr *myCountries = malloc(sizeof(*myCountries) * NUM_RECS);
for(int i = 0; i < NUM_RECS; ++i) {
    myCountries[i] = malloc(sizeof(*myCountries[i]));
}

while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {

    token = strtok(buffer,",");
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->code, token);
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->name, token);
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    myCountries[index]->population = atoi(token);
    token = strtok(NULL, ",");
    myCountries[index]->lifeExpectancy = atof(token);
    //printf("%s", buffer);
    index++;
}

printf("%s", myCountries[1]->code); // test?
free(myCountries);

}

nrh
  • 13
  • 4
  • 1
    Suggest you do some basic debugging. With a debugger or even basic debug print statements. For example, inspect `buffer` and `token` after every line in the loop. – kaylum Jan 31 '17 at 04:07
  • For starters `index` is uninitialised. Something you should be able to easily spot with some basic debugging as suggested. – kaylum Jan 31 '17 at 04:07
  • Now that you have fixed the `index` problem, the next issue is that your `code` buffers are not large enough; `char code[3];`. Strings in C are NUL terminated. Thus to store a 3 letter string you need 4 `char`s. So you have buffer overflows resulting in Undefined Behaviour. – kaylum Jan 31 '17 at 04:16
  • Okay, it's edited. – nrh Jan 31 '17 at 04:26

2 Answers2

1

Have a look at the following. In the first instance you will need to do some work to improve the areas marked NYI

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdarg.h>

#define BUF_SIZE 512
#define NUM_RECS 238

typedef struct {
  char code[4]; // NYI - magic numbers
  char name[41]; // NYI - magic numbers
  int population; // NYI - what if atoi fails? 
  float lifeExpectancy; // NYI - what if atof fails?
} Country;

typedef Country* countryPtr;

int main( int argc, const char* argv[] ) {
  /* Opening the file */
  FILE *filePtr;  // pointer to file
  if ((filePtr = fopen("a.txt", "r")) == NULL) {   // if couldn't open file
    printf("Error opening file\n"); // error message
    exit(1);
  }

  /* Reading the file */
  char buffer[BUF_SIZE];  // buffer to read
  int index=0;
  char *token; // NYI - initial value
  countryPtr* myCountries = calloc(NUM_RECS, sizeof(countryPtr));
  for(int i = 0; i < NUM_RECS; ++i) {
    myCountries[i] = calloc(1, sizeof(Country));
  }

  while (fgets(buffer, BUF_SIZE, filePtr) != NULL) {
    // NYI - magic lengths / overflow strcpy targets

    token = strtok(buffer,","); // NYI - This is probably not the best way to do this. At least fold into a loop.
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->code, token);
    token = strtok(NULL, ",");
    strcpy(myCountries[index]->name, token);
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");
    token = strtok(NULL, ",");

    myCountries[index]->population = atoi(token); // NYI - atoi failure
    token = strtok(NULL, ",");
    myCountries[index]->lifeExpectancy = atof(token); // NYI - atof failure
    printf("%s", buffer);
    index++;
  }

  printf("%s\n", myCountries[0]->code); // test? NYI - need more proof
  free(myCountries); // NYI - this is a sequence - need to free each of the new elements 
}
london-deveoper
  • 533
  • 3
  • 8
  • You're right about 'magic numbers'. Should be constants. You're right about `atoi() and atof()` failing. I couldn't address everything in my answer but [this](http://stackoverflow.com/q/22865622/2079103) covers the numeric conversion failure issue. Good to handle that properly too. – clearlight Jan 31 '17 at 05:20
0

I took a different approach to solving it based on your code and data file. I tested it. It works with a file of the record type you showed. Hopefully it will explain some things and make your work easier and give you a good place to work from.

I don't like to write programs in a way that has to pre-count (time consuming) or pre-know the number of records in a file on general principles except maybe in rare cases. So when reading files I prefer to allocate memory as I go. Now if there's a big file and a lot of data, then you have to come up with a better memory management scheme than to keep it all in memory. At some point you're better off going with a canned db solution of some sort. MySQL, an API, library, parser, etc... but this should work for small files.

Usually in C on UNIX, exit(0) means success, exit(-1) means failure. Also since your country codes were 3 characters, the field to hold it has to be at least 4 characters for the trailing '\0'

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <strings.h>

#define MAXRECL   512
#define MAXFIELDS 100
#define MAXFIELDL 80

// Field indicies

#define COUNTRY_CODE    1
#define COUNTRY_NAME    2
#define POPULATION      7
#define LIFE_EXPECTANCY 8

#define CCMAX           3
#define CNMAX           40

typedef struct Country {
   struct Country *next;
   char  code[CCMAX + 1];  // (Need room for trailing '\0')
   char  name[CNMAX + 1];  // (Need room for trailing '\0')
   int   population;
   float lifeExpectancy;
} country_t;

country_t *countryRecords;

int main( int argc, const char* argv[] ) {

    FILE *fp;
    if ((fp = fopen("AllCountries.dat", "r")) == NULL) {  
        printf("Error opening file\n"); 
        exit(-1);
    }
    int totalCountries = 0;
    char buf[MAXRECL];
    char fields[MAXFIELDS][MAXFIELDL];
    country_t *prev_country = NULL;
    while (fgets(buf, MAXRECL, fp) != NULL) {
        ++totalCountries;      
        country_t *country = calloc(sizeof(struct Country), 1);
        if (country == NULL) {
            fprintf(stderr, "Out of memory\n");
            exit(-1);
        }
        char *field = strtok(buf, ",");
        int i = 0;
        while(field != NULL) {
          strncpy(fields[i++], field, MAXFIELDL);
          field = strtok(NULL, ",");
        }        
        strcpy(country->code, fields[COUNTRY_CODE]);
        strcpy(country->name, fields[COUNTRY_NAME]);
        country->population = atoi(fields[POPULATION]);
        country->lifeExpectancy = atof(fields[LIFE_EXPECTANCY]);

        if (countryRecords == NULL)
            countryRecords = country;
        else 
            prev_country->next = country;
        prev_country = country;  
    }
    printf("Total countries: %d\n", totalCountries);

    country_t *country = countryRecords;
    while(country != NULL) {
        printf("%3s %30s  Population: %7d Life Expectancy: %5.2f\n",
            country->code, country->name, country->population, country->lifeExpectancy); 
        country_t *prev_country = country;
        country = country->next;
        free(prev_country);
    }
    printf("Done\n");
    exit(0);
}
clearlight
  • 12,255
  • 11
  • 57
  • 75
  • Also I should mention, I used `calloc()` instead of `malloc()` because `calloc()` zeroes the memory before it returns it to you, `malloc()` doesn't. – clearlight Jan 31 '17 at 05:33