I'm struggling with a problem in C and I can't figure what is happening. I'm making a tree to sort words depending on the frequency of letters (e.g. "cab" is "1a 1b 1c"). Here is my code :
#define M 8 //maximal number of appearances of a letter
#define LAST_LETTER 25 //number of last letter
#define bzero(b,len) (memset((b), '\0', (len)), (void) 0)
static const char* alphabet = {"abcdefghijklmnopqrstuvwxyz"};
//TODO: define list structures
typedef struct word word;
struct word {
char* _word;
word *next;
};
typedef struct list list;
struct list {
word *first;
};
typedef struct dict dict;
struct dict {
dict *children[M];
list *words[M];
};
//returns an empty list
list * list_new() {
list *l = (list*) malloc(sizeof(list));
word *w = (word*) malloc(sizeof(word));
if (l == NULL || w == NULL) {
printf("could not create list : memory allocation failed\n");
return;
}
w->_word = NULL;
w->next = NULL;
l->first = w;
return l;
}
//append word at end of list
void list_append(list *l, char *w) {
// create a new word
word *new_word = malloc(sizeof(word));
if (l == NULL || new_word == NULL) {
printf("could not append word to list : list is empty\n");
return;
}
new_word->_word = malloc(strlen(w) + 1);
strcpy(new_word->_word, w);
new_word->next = NULL;
//insert the word
if (l->first->_word == NULL) {
l->first->_word = new_word->_word;
}
else {
//word *temp = malloc(sizeof(word));
word *temp;
temp = l->first;
while(temp->next != NULL) {
temp=temp->next;
}
temp->next = new_word;
}
}
//print word list
void list_print(list *l) {
if (l == NULL || l->first == NULL) {
printf("could not print list : list is empty\n");
return;
}
word *current = l->first;
while (current != NULL) {
printf("%s -> ", current->_word);
current = current->next;
}
printf("NULL\n");
}
char *compute_signature(const char *word) {
char *signature = (char*) malloc(26);
memset((void*)signature, 0, 26);
int i = 0, j = 0, n = 0;
char current_letter, letter;
for (i = 0; i < 26; i++) {
current_letter = alphabet[i];
n = 0;
for (j = 0; j < (int) strlen(word); j++) {
letter = word[j];
if (letter == current_letter) {
n++;
}
}
signature[i] = (char) n;
}
return signature;
}
void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
int j = 0;
int steps = 0;
int occur = 0;
dict *temp = NULL;
if (current_letter == strlen(w)-1) {
printf("Word found : %s!\n",w);
int i = 0;
int different_letters = 0;
for (i = 0; i < 26; i++) {
if ((int) signature[i] > 0) {
different_letters++;
}
}
for (i = 0; i < 26; i++) {
occur = (int) signature[i];
if (occur > 0) {
steps++;
if (steps < different_letters) break;
}
else {
if (temp == NULL) {
temp = d;
}
if (temp->children[occur] == NULL) {
temp->children[occur] = (dict*) malloc(sizeof(dict));
temp = temp->children[occur];
}
}
}
if (temp == NULL) {
temp = d;
}
list *l = NULL;
if (temp->words[occur] == NULL || temp->words[occur]->first == NULL) {
temp->words[occur] = list_new();
l = temp->words[occur];
}
else {
l = temp->words[occur];
}
char *new;
new = malloc(strlen(w) + 1);
strcpy(new, w);
list_print(l);
/*list_append(l,new);
list_print(l);*/
}
else {
printf("Current letter: %c.\n",w[current_letter]);
dict_insert(d,signature,current_letter+1,w);
}
}
dict * read_words(const char *file) {
FILE* f = NULL;
f = fopen(file, "r");
if (f == NULL) {
printf("Could not open file.\n");
exit(EXIT_FAILURE);
}
char line[256];
dict *d = (dict*) malloc(sizeof(dict));
//bzero((void*)d, sizeof(dict));
while (fgets(line, sizeof(line), f) != NULL) {
if (line[strlen(line) - 1] == '\n') {
line[strlen(line) - 1] = '\0';
}
char *new;
new = malloc(strlen(line) + 1);
strcpy(new, line);
char *signature = compute_signature(new);
dict_insert(d, signature, 0, new);
free((void*)signature);
}
return d;
}
int main(int argc, const char* argv[]) {
/*list *myList = list_new();
list_print(myList);
list_append(myList,"Word1");
list_print(myList);
list_append(myList,"Word2");
list_print(myList);
list_append(myList,"Word3");
list_print(myList);*/
dict *d = read_words("list");
/*list_print(d->words[1]);
list_print(d->children[0]->words[1]);
list_print(d->children[0]->children[0]->words[1]);
list_print(d->words[2]);
list_print(d->children[0]->words[2]);*/
return 0;
}
This code almost works. But not as intended: some words are misplaced in the tree, and sometimes the lists seems to be duplicated (I have not figured how exactly yet). Can you please help me clean this code (maybe pointing out the biggest mistakes^^) ?
Edit: code partially fixed with comments. I have a segfault on list_append()
, at the line if (strcmp(l->first->_word, "") == 0)
.
Edit 2: When I pass a word in my algorithm, I need to test if the list exists or not: e.g. I add "doom" to the list corresponding to "1d 1m 2o", when I pass "mood" I want the algorithm to add it to the same list. Here is how I tried to achieve this:
if (temp == NULL) {
temp = d;
}
list *l = NULL;
if (temp->words[occur] == NULL || temp->words[occur]->first == NULL) {
temp->words[occur] = list_new();
l = temp->words[occur];
}
else {
l = temp->words[occur];
}
I have a segfault for temp->words[occur]->first == NULL
...
Edit 3: my code is compiling but the words are not added to the right list. I think the issue lies in dict_insert()
which declaration is :
void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
int j = 0;
int steps = 0;
int occur = 0;
dict *temp = NULL;
if (current_letter == strlen(w)-1) {
printf("Word found : %s!\n",w);
int i = 0;
int different_letters = 0;
for (i = 0; i < 26; i++) {
if ((int) signature[i] > 0) {
different_letters++;
}
}
for (i = 0; i < 26; i++) {
occur = (int) signature[i];
if (occur > 0) {
steps++;
if (steps == different_letters) break;
}
printf("%d RIGHT\n",occur);
printf("1 DOWN\n");
if (temp == NULL) {
temp = d;
}
temp->children[occur] = (dict*) realloc(temp->children[occur], sizeof(dict));
//temp = temp->children[occur]
}
printf("%d RIGHT\n",occur);
if (temp == NULL) {
temp = d;
}
if (temp->words[occur] == NULL) {
list *l = list_new();
temp->words[occur] = l;
}
char *new;
new = malloc(strlen(w) + 1);
strcpy(new, w);
list_print(temp->words[occur]);
list_append(temp->words[occur],new);
list_print(temp->words[occur]);
}
else {
printf("Current letter: %c.\n",w[current_letter]);
dict_insert(d,signature,current_letter+1,w);
}
}
Edit 4: dict_insert()
completely reworked.
void dict_insert(dict *d, char *signature, unsigned int current_letter, char *w) {
int occur;
occur = (int) signature[current_letter];
if (current_letter == LAST_LETTER) {
if (d->words[occur] == NULL) {
d->words[occur] = list_new();
}
char *new;
new = malloc(strlen(w) + 1);
strcpy(new, w);
printf("word found : %s!\n",w);
list_print(d->words[occur]);
list_append(d->words[occur],new);
list_print(d->words[occur]);
}
else {
if (d->children[occur] == NULL) {
d->children[occur] = malloc(sizeof(dict));
}
d = d->children[occur];
dict_insert(d,signature,current_letter+1,w);
}
}
still a segfault on if (d->children[occur] == NULL)
...