1

I'm struggling with a problem in C and I can't figure what is happening. I'm making a tree to sort words depending on the frequency of letters (e.g. "cab" is "1a 1b 1c"). Here is my code :

#define M 8 //maximal number of appearances of a letter
#define LAST_LETTER 25 //number of last letter
#define bzero(b,len) (memset((b), '\0', (len)), (void) 0)
static const char* alphabet = {"abcdefghijklmnopqrstuvwxyz"};


//TODO: define list structures

typedef struct word word;
struct word {
    char* _word;
    word *next;
};

typedef struct list list;
struct list {
    word *first;
};

typedef struct dict dict;
struct dict {
        dict *children[M];
        list *words[M];
};

//returns an empty list
list * list_new() {
    list *l = (list*) malloc(sizeof(list));
    word *w = (word*) malloc(sizeof(word));
    if (l == NULL || w == NULL) {
        printf("could not create list : memory allocation failed\n");
        return;
    }
    w->_word = NULL;
    w->next = NULL;
    l->first = w;
    return l;
}

//append word at end of list
void list_append(list *l, char *w) {
    // create a new word
    word *new_word = malloc(sizeof(word));
    if (l == NULL || new_word == NULL) {
        printf("could not append word to list : list is empty\n");
        return;
    }
    new_word->_word = malloc(strlen(w) + 1);
    strcpy(new_word->_word, w);
    new_word->next = NULL;
    //insert the word
    if (l->first->_word == NULL) {
        l->first->_word = new_word->_word;
    }
    else {
        //word *temp = malloc(sizeof(word));
        word *temp;
        temp = l->first;
        while(temp->next != NULL) {
            temp=temp->next;
        }
    temp->next = new_word;
    }
}

//print word list
void list_print(list *l) {
    if (l == NULL || l->first == NULL) {
        printf("could not print list : list is empty\n");
        return;
    }
    word *current = l->first;
    while (current != NULL) {
        printf("%s -> ", current->_word);
        current = current->next;
    }
    printf("NULL\n");
}

char *compute_signature(const char *word) {
        char *signature = (char*) malloc(26);
        memset((void*)signature, 0, 26);
        int i = 0, j = 0, n = 0;
        char current_letter, letter;
    for (i = 0; i < 26; i++) {
            current_letter = alphabet[i];
                n = 0;
                for (j = 0; j < (int) strlen(word); j++) {
                    letter = word[j];
                        if (letter == current_letter) {
                                n++;
                        }
                }
                signature[i] = (char) n;
        }
        return signature;
}


void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
    int j = 0;
    int steps = 0;
    int occur = 0;
    dict *temp = NULL;
    if (current_letter == strlen(w)-1) {
        printf("Word found : %s!\n",w);
        int i = 0;
        int different_letters = 0;
        for (i = 0; i < 26; i++) {
            if ((int) signature[i] > 0) {
                different_letters++;
            }
        }
        for (i = 0; i < 26; i++) {
            occur = (int) signature[i];
            if (occur > 0) {
                steps++;
                if (steps < different_letters) break;
            }
            else {
                if (temp == NULL) {
                    temp = d;
                }
                if (temp->children[occur] == NULL) {
                    temp->children[occur] = (dict*) malloc(sizeof(dict));
                    temp = temp->children[occur];
                }
            }
        }
        if (temp == NULL) {
            temp = d;
        }
        list *l = NULL;
        if (temp->words[occur] == NULL || temp->words[occur]->first == NULL) {
            temp->words[occur] =  list_new();
            l = temp->words[occur];
        }
        else {
            l = temp->words[occur];
        }
        char *new;
        new = malloc(strlen(w) + 1);
        strcpy(new, w);
        list_print(l);
        /*list_append(l,new);
        list_print(l);*/
    }
    else {
        printf("Current letter: %c.\n",w[current_letter]);
        dict_insert(d,signature,current_letter+1,w);
    }
}


dict * read_words(const char *file) {
    FILE* f = NULL;
    f = fopen(file, "r");
    if (f == NULL) {
        printf("Could not open file.\n");
        exit(EXIT_FAILURE);
    }
    char line[256];
    dict *d = (dict*) malloc(sizeof(dict));
    //bzero((void*)d, sizeof(dict));
    while (fgets(line, sizeof(line), f) != NULL) {
        if (line[strlen(line) - 1] == '\n') {
            line[strlen(line) - 1] = '\0';
        }
        char *new;
        new = malloc(strlen(line) + 1);
        strcpy(new, line);
        char *signature = compute_signature(new);
        dict_insert(d, signature, 0, new);
        free((void*)signature);
    }
    return d;
}


int main(int argc, const char* argv[]) {
    /*list *myList = list_new();
    list_print(myList);
    list_append(myList,"Word1");
    list_print(myList);
    list_append(myList,"Word2");
    list_print(myList);
    list_append(myList,"Word3");
    list_print(myList);*/
    dict *d = read_words("list");
    /*list_print(d->words[1]);
    list_print(d->children[0]->words[1]);
    list_print(d->children[0]->children[0]->words[1]);
    list_print(d->words[2]);
    list_print(d->children[0]->words[2]);*/
    return 0;
}

This code almost works. But not as intended: some words are misplaced in the tree, and sometimes the lists seems to be duplicated (I have not figured how exactly yet). Can you please help me clean this code (maybe pointing out the biggest mistakes^^) ?

Edit: code partially fixed with comments. I have a segfault on list_append(), at the line if (strcmp(l->first->_word, "") == 0).

Edit 2: When I pass a word in my algorithm, I need to test if the list exists or not: e.g. I add "doom" to the list corresponding to "1d 1m 2o", when I pass "mood" I want the algorithm to add it to the same list. Here is how I tried to achieve this:

if (temp == NULL) {
    temp = d;
}
list *l = NULL;
if (temp->words[occur] == NULL || temp->words[occur]->first == NULL) {
    temp->words[occur] =  list_new();
    l = temp->words[occur];
}
else {
    l = temp->words[occur];
}

I have a segfault for temp->words[occur]->first == NULL...

Edit 3: my code is compiling but the words are not added to the right list. I think the issue lies in dict_insert() which declaration is :

void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
    int j = 0;
    int steps = 0;
    int occur = 0;
    dict *temp = NULL;
    if (current_letter == strlen(w)-1) {
        printf("Word found : %s!\n",w);
        int i = 0;
        int different_letters = 0;
        for (i = 0; i < 26; i++) {
            if ((int) signature[i] > 0) {
                different_letters++;
            }
        }
        for (i = 0; i < 26; i++) {
            occur = (int) signature[i];
            if (occur > 0) {
                steps++;
                if (steps == different_letters) break;
            }
            printf("%d RIGHT\n",occur);
            printf("1 DOWN\n");
            if (temp == NULL) {
                temp = d;
            }
            temp->children[occur] = (dict*) realloc(temp->children[occur], sizeof(dict));
            //temp = temp->children[occur]
        }
        printf("%d RIGHT\n",occur);
        if (temp == NULL) {
            temp = d;
        }
        if (temp->words[occur] == NULL) {
            list *l = list_new();
            temp->words[occur] = l;
        }
        char *new;
        new = malloc(strlen(w) + 1);
        strcpy(new, w);
        list_print(temp->words[occur]);
        list_append(temp->words[occur],new);
        list_print(temp->words[occur]);
    }
    else {
        printf("Current letter: %c.\n",w[current_letter]);
        dict_insert(d,signature,current_letter+1,w);
    }
}

Edit 4: dict_insert() completely reworked.

void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
    int occur;
    occur = (int) signature[current_letter];
    if (current_letter == LAST_LETTER) {
        if (d->words[occur] == NULL) {
            d->words[occur] = list_new();
        }
        char *new;
        new = malloc(strlen(w) + 1);
        strcpy(new, w);
        printf("word found : %s!\n",w);
        list_print(d->words[occur]);
        list_append(d->words[occur],new);
        list_print(d->words[occur]);
    }
    else {
        if (d->children[occur] == NULL) {
            d->children[occur] = malloc(sizeof(dict));
        }
        d = d->children[occur];
        dict_insert(d,signature,current_letter+1,w);
    }
}

still a segfault on if (d->children[occur] == NULL)...

kh4r4
  • 65
  • 7
  • 1
    In `read_words` you use an uninitialized `size`. Try `sizeof line` instead (and get rid of the `size` variable): `while (fgets(line, sizeof line, f) != NULL) {`. Also, increase the warning level of your compiler and **mind the warnings**. – pmg Nov 25 '11 at 10:25
  • 1
    1) You don't need the list (it contains only a pointer to a word) 2) you don't need a word with a "" payload: a NULL pointer (for eather the string or the word) can carry exactly the same information. 3) the compute_signature function is highly inefficent. 4) instead of aclling strlen() multiple times it might be better to call it once and remember and reuse the result. 5) don't cast malloc()s returnvalue. – wildplasser Nov 25 '11 at 10:32
  • OK, I will work on that. I have added an "Edit 2" with a specific question on my algorithm. @pmg: Fixed. Thank you. – kh4r4 Nov 25 '11 at 12:08
  • If anyone could give me a clue about my edit 2 issue, it would be greatly appreciated – kh4r4 Nov 25 '11 at 15:14
  • I think I'm really close to the answer, but I must have made a big pointer mistake somewhere... – kh4r4 Nov 26 '11 at 13:30
  • Ad update#3: all of your insert code is in the "if (current_letter == strlen(w)-1)" part of the function. I don't think that is correct. Again: you don't need the list-structure; a pointer to word is just as good. TIP: try to separate the code into a) find place where node/word should be or should be inserted and b) update it or insert it at the found location. That would probably avoid a lot of the mess with temp and (temp==NULL). – wildplasser Nov 26 '11 at 13:54
  • @wildplasser I need the list structure because the dict structure has been given to me by a tutor. I have completely changed the way a word is added to the tree, after your comments. I still have `== NULL` issues... – kh4r4 Nov 27 '11 at 09:39

1 Answers1

3

In compute_signature you are trying to compare the current letter of the alphabet with the current letter of the word by

strcmp((const char*) &letter, (const char*) &current_letter) == 0

while letter and current_letter are char *. So you are pointing them somewhere into memory by letter = alphabet[i] and then taking the address of the pointer and sticking it into strcmp. I'm surprised it didn't crash in the original version.

letter and current_letter should be changed to type char instead of char * and your comparison should be if (letter == current_letter)

A few other remarks:

  1. You are casting the return value of malloc. While this won't necessarily break anything it's not a good idea.
  2. When computing the signature and using it you constantly cast between int and char. While it is unlikely you'll ever have a word with more than 127 occurences of the same letter you should consider changing signature to be an array of short or int
  3. You have a lot of casts in your code. Casts are a way of "forcing" the compiler to accept the type as you want it which you should only do if it is really necessary as it can lead to masking bugs. So whenever you write a cast ask yourself: Why do I have to do it and can I avoid it?
  4. In list_append you do

    word *temp = malloc(sizeof(word));
    temp = l->first;
    

    This creates a memory leak as you allocate some memory and then you forget about it by setting temp to the list head. Just get rid of the malloc it is not necessary. Same thing in dict_insert

    temp->words[occur] = (list*) malloc(sizeof(list));
    list *l = list_new();
    temp->words[occur] = l;
    

    Get rid of the malloc.

Community
  • 1
  • 1
ChrisWue
  • 18,612
  • 4
  • 58
  • 83
  • No, it is not, I must admit. But I have only a pointer exception so I can't say much more. – kh4r4 Nov 24 '11 at 18:41
  • @kh4r4: Sorry, I was briefly confused, fixed the explanation – ChrisWue Nov 24 '11 at 18:56
  • I do not understand what makes my program going rogue on me! Sometimes pointer exceptions, sometimes correct execution but wrong word lists... – kh4r4 Nov 24 '11 at 23:51
  • I used your comments to fix some problems and I changed a bit the algorithm. (I also changed my IDE for Netbeans 7). I now have a segfault error in `list_append()`. See "Edit" in my first post. – kh4r4 Nov 25 '11 at 10:14