0

Please excuse any mistakes made in this post, as this is my first post here, but do point them out.

I'm still quite new to C, but I'm trying to implement a dictionary from a txt file that has one word in each line through a hash table but whenever I try to search for a word it's never found, though it exists in the file.

Can you please help me figure out what I'm doing wrong?

Both following files:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define tab_size 29                  /* table size */
#define word_len 24                  /* max length of word */

.h file:

/* structure to be used for each word */

typedef struct list {
      char *word;                  
      struct list *next;         
} WORD;

.c file:

/* create table */ 

WORD **create_tab(int size) {
       int i = 0;
       WORD **hash_tab;
       hash_tab = malloc(sizeof(*hash_tab) * size);    /*allocate memory */
       for (; i<size; i++)              /* initialize elements */
              hash_tab[i]=NULL;
return hash_tab;
}


/* search for word in table; returns 1 if found, 0 otherwise */

int search(WORD **tb, char *w) {
      WORD *htmp/*, *hprv*/;
      unsigned long hash = (hash_f(w) % tab_size);     /* hash_f is Dan Bernstein's hash function; modulo by tab_size to make sure it fits */  
      for (htmp = tb[hash]; (htmp != NULL) && (strcmp(htmp->word, w) != 0); / htmp = htmp->next)      /* follow chained array of respective cell until word is found or until the end */
      {
        ;
      }
      if (htmp == NULL) return 0;
return 1;
}


/* insert new WORD with word w at the beginning of the chained array of the respective cell */

void insert(WORD **ht, char *w) {
   WORD *htmp;
   unsigned long hash = (hash_f(w) % tab_size);     /* hash_f is Dan Bernstein's hash function; modulo by tab_size to make sure it fits */
   htmp = malloc( sizeof(*htmp) );      /* allocate memory for new WORD */
   htmp->word = calloc(word_len+1,sizeof(char));    /* allocate memory for new word with  max word length plus one character to make sure there's no buffer overflow in the next line*/
   strncpy(htmp->word, w, word_len);             /* copy w to word */  
   htmp->next = ht[hash];                   /* new WORD now points to content of the respective table cell */
   ht[hash] = htmp;                /* table cell now points to new WORD */
}

/* receive empty table and create the whole dictionary in memory word by word */

WORD **make(WORD **dic;) {  
      char w[word_len];         
      FILE *dictionary;    
      dictionary = fopen("dictionary.txt","r");                   
      while ((fgets( w, word_len, dictionary )) != NULL)    
          insert(dic, w);                    
      fclose(dictionary);  
return dic;
}



int main() {
     WORD **dic;
     char w[word_len];
     dic = create_tab(tab_size);     /* create the table */
     dic = make(dic);                   /* insert all entrys of dictionary in table */
     printf("Insert a word in lowercase: \n"); 
     if ((scanf("%s",w)) == 0) return 0;    /* if I didn't somehow verify the scanf it would return an error */
     else if (search(dic, w) == 1) printf("The word %s exists in the dictionary \n",w);
          else printf("The word %s does not exist in the dictionary",w);
return 0;
}

I've tried several aproaches, some based on the following links, but this always happens. This version is based on the last one.

Quick Way to Implement Dictionary in C http://www.sparknotes.com/cs/searching/hashtables/section3.rhtml http://www.seg.rmit.edu.au/code/zwh-ipl/hashtable.c

Community
  • 1
  • 1

1 Answers1

1

Problem 1

This is not right.

   hash_tab = malloc(sizeof(WORD) * size);    /*allocate memory */

It should be

   hash_tab = malloc(sizeof(WORD*) * size);    /*allocate memory */

To avoid errors like this, you should get in the habit of using

   hash_tab = malloc(sizeof(*hash_tab) * size);    /*allocate memory */

Problem 2

  for (; i<tab_size; i++)              /* initialize elements */
          hash_tab[i]=NULL;

should be

  for (; i<size; i++)              /* initialize elements */
          hash_tab[i]=NULL;

This is a potential problem. It may not be a real problem in your case if size and tab_size happens to be equal. But as a matter of style, you should use the second block of code.

R Sahu
  • 204,454
  • 14
  • 159
  • 270
  • 1
    Thank you. I still have difficulty working with pointers. I'll keep this in mind :) – user3693508 May 31 '14 at 09:27
  • I would like to recommend that the loop should be either `for (i = 0; i < size; i++)` or (even better if your compiler supports C99 or C11), use `for (int i = 0; i < size; i++)`. You might not need the separate declaration of `i` with the latter loop. Put all the loop controls in the loop, rather than relying on a distant initialization of your loop control variable. – Jonathan Leffler May 31 '14 at 21:16
  • @JonathanLeffler Thank you for the tip. That was based on a misconception. I'd seen a step by step example of code optimization in C and misunderstood what it said should be done before a `for` rather than in it. – user3693508 Jun 01 '14 at 08:56