0

I'm trying to read a csv file that contains 9 columns each having information about a person. I'm supposed to store the data in a hash table and create a function to look up data based on the surname. This is my code

#include<stdio.h>
#include<stdlib.h>
#include <string.h>
#include <ctype.h>
#define tablesize 27
#define entries 21


unsigned long int collisions = 0;

typedef struct {
    char id[20];
    char depid[10];
    char surname[20];
    char forename[20];
    char age[2];
    char ptype[20];
    char gender[6];
    char nation[20];
    char religion[20];
    char occupation[20];
}dict;

dict* hashTable[tablesize]= {NULL};

unsigned long int hash_function(char* s){
    unsigned long int hash = 0;
    while(*s){
        hash = hash + *s;
        s++;
    }
    return hash%tablesize;
}

void print_table(){
    for(unsigned long int i=0;i<tablesize;i++){
        if(hashTable[i]==NULL){
            printf("%d\t---\t---\n",i);
        }
        else{
            printf("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",i,hashTable[i]->id,hashTable[i]->depid,hashTable[i]->surname,hashTable[i]->forename,hashTable[i]->age
            ,hashTable[i]->ptype,hashTable[i]->gender,hashTable[i]->nation,hashTable[i]->religion,hashTable[i]->occupation);
        }
    }
}

void insert(dict *d){
    unsigned long int ind = hash_function(d->surname);
    for(unsigned long int i=0;i<tablesize;i++){
        unsigned long int try = (ind+i)%tablesize;
        if(hashTable[try]==NULL){
            hashTable[try] = d;
            return;
        }
        else{
            collisions++;
        }
    }
}

void printvalues(unsigned long int i){
        printf("%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",i,hashTable[i]->id,hashTable[i]->depid,hashTable[i]->surname,hashTable[i]->forename,hashTable[i]->age
        ,hashTable[i]->ptype,hashTable[i]->gender,hashTable[i]->nation,hashTable[i]->religion,hashTable[i]->occupation);
}

void search(char* name){
    unsigned long int ind = hash_function(name);
    unsigned long int f=1;
    for(unsigned long int i=0;i<tablesize;i++){
        unsigned long int try = (ind+i)%tablesize;
        if(hashTable[try]!=NULL&&strcmp(hashTable[try]->surname,name)==0){
            printvalues(try);
            f=0;
        }
    }
    if(f==1)
        printf("%s not in table\n",name);
    return;
}

int main(){
    FILE *fp = fopen("truncated.csv","r");
    if(!fp){
        printf("Error");
        return 0;
    }
    char buff[1024];
    unsigned long int row = 0, column = 0;
    dict values[entries];
    unsigned long int i=0;
    while(fgets(buff,1024,fp)){
        column=0;
        row++;
        if(row==1){
            continue;
        }
        char *field = strtok(buff,",");
        while(field){
            if(column==0){
                strcpy(values[i].id,field);
            }
            if(column==1){
                strcpy(values[i].depid,field);
            }
            if(column==2){
                strcpy(values[i].surname,field);
            }
            if(column==3){
                strcpy(values[i].forename,field);
            }
            if(column==4){
                strcpy(values[i].age,field);
            }
            if(column==5){
                strcpy(values[i].ptype,field);
            }
            if(column==6){
                strcpy(values[i].gender,field);
            }
            if(column==7){
                strcpy(values[i].nation,field);
            }
            if(column==8){
                strcpy(values[i].religion,field);
            }
            if(column==9){
                strcpy(values[i].occupation,field);
            }
            field = strtok(NULL,",");
            column++;
        }
        i++;
    }
    fclose(fp);
    for(unsigned long int i=0;i<entries;i++){
        insert(&values[i]);
    }
    //printvalues(values);
    //print_table();
    while(1){
        printf("Enter term to get frequency or type 'quit' to escape:");
        char name[20];
        scanf("%s",name);
        if(strcmp(name,"quit")==0)
            return 0;
        search(name);
    }
    return 0;
}

The problem I'm facing is that i have two csv files, one containing 60000 entries and one containing only 21 entries. When i read the smaller file, the code works just fine. But i am getting no output whatsoever for the bigger file. Any ideas? Thanks in advance.

Sukhman
  • 11
  • 2

1 Answers1

0

In your code, you have only space for 21 entries (line #define entries 21)

So you'll parse 22th line and more in the big file, you'll try to write into a forbidden place. From this point, you enter in UB zone.

Solution: make values dynamic.


int count = 0;
dict *values = NULL;
 
while(fgets(buff,1024,fp)){
  ++count;
  dict * tmp = realloc(values, sizeof (dict) * count);
  if (NULL == tmp) {
      perror("realloc");
      free(values);
      exit(1);
  } else {
      values = tmp;
  }

and after the while loop:

free(values);
Mathieu
  • 8,840
  • 7
  • 32
  • 45
  • 1
    The construct being used here for `realloc` is a potential memory leak. Use a different pointer to get the new memory each time: `temp = realloc(values, sizeof (dict) * count);` `if (temp) {values = temp...`, if not, `free(values);` and error out. – ryyker Oct 12 '22 at 12:41
  • Worked like a charm. Thanks a lot. Also, how do i free up the dynamically allocated space? – Sukhman Oct 12 '22 at 12:47
  • 1
    Take a look at this page for how to [properly use realloc()](https://stackoverflow.com/a/21006798/645128) – ryyker Oct 12 '22 at 12:48
  • 1
    FWIW - to reduce confusion, it would improve your answer by just including the second code example, and completely removing the first one. Be sure to include the variable declarations of the first section though. – ryyker Oct 12 '22 at 13:00