-1

I am coding an algorithm that uses the trie data structure. Basically, if input is "he", it will return ["hello","help","held","hen",other_words_starting_with_he]. code:

//NOTE: The expression sizeof(array)/sizeof(node) must always evaluate to 26. Not more; not less, for all the node arrays.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
char *chars="abcdefghijklmnopqrstuvwxyz";
struct node{
    char ch;
    struct node *next[26];
};
void init_w_null(struct node **n, int len){
    register int counter=0;
    while(counter<len){
       *(n+counter)=NULL;
       counter++;
    }
}
int index_of_char(char ch){
    register int counter=0;
    while(*(chars+counter)!='\0'){
        if(*(chars+counter)==ch){
            return counter;
        }
        counter++;
    }
    return -1;
}
void insert(struct node **root, char *key){
    if(*root==NULL){
        *root=(struct node*)malloc(sizeof(struct node));
        if(*root==NULL){
            perror("[malloc]");
            exit(EXIT_FAILURE);
        }
        init_w_null((**root).next,26);
        struct node *selected=*root;
        (**root).ch=key[0];
        register int counter=1;
        while(counter<strlen(key)){
            int ind=index_of_char(key[counter]);
            (*selected).next[ind]=(struct node *)malloc(sizeof(struct node));
            if(selected==NULL){
                perror("[malloc]");
                exit(EXIT_FAILURE);
            }
            (*(*selected).next[ind]).ch=key[counter];
            selected=(*selected).next[ind];
            counter++;
        }
        return;
    }
    register int counter=1;
    struct node *selected=*root;
    while(counter<=strlen(key)){
        int ind=index_of_char(key[counter]);
        if((*selected).next[ind]!=NULL){
            selected=(*selected).next[ind];
            counter++;
            continue;
        }
        (*selected).next[ind]=(struct node*)malloc(sizeof(struct node));
        if((*selected).next[ind]==NULL){
            perror("[malloc]");
            exit(EXIT_FAILURE);
        }
        (*(*selected).next[ind]).ch=key[counter];
        selected=(*selected).next[ind];
        init_w_null((*selected).next,26);
        counter++;
    }
}
void find(struct node *root, char *key){
    register int counter=1;
    struct node *selected=root;
    int ind=0;
    while(counter<=strlen(key)){
        //if key param ends, and tree doesn't
        if(key[counter]=='\0'){
            printf("List of possible keys:\n");
            construct_str(selected,key);
            return;
        }
        ind=index_of_char(key[counter]);
        //a character of key not found.
        if((*selected).next[ind]==NULL){
            puts("Similar keys not found.");
            return;
        }
        selected=(*selected).next[ind];
        counter++;
    }
    puts("Key found.");
}
void construct_str(struct node *n, char *str){
    puts("[construct_str]");
    //end of recursion
    if(all_children_null(n)&&n!=NULL){
        printf("%s\n",str);
        return;
    }
    register int counter=0;
    while(counter<26){
        if((*n).next[counter]!=NULL){
            char nstr[2];
            nstr[0]=(*(*n).next[counter]).ch;
            nstr[1]='\0';
            str=strcat(str,nstr);
            construct_str((*n).next[counter],str);
        }
        counter++;
    }
}
int all_children_null(struct node *n){
    register int counter=0;
    while(counter<26){
        if((*n).next[counter]!=NULL){
            return 0;
        }
        counter++;
    }
    return 1;
}
void insert_full(struct node **arr, char *key){
    int first=index_of_char(key[0]);
    insert(&arr[first],key);
}
//a debugging function to see whether insertion is successful.
/*void raw_print(struct node *n){
    //puts("[raw_print]");
    if(n!=NULL){
        putchar((*n).ch);
        register int counter=0;
        for(;counter<26;counter++){
            raw_print((*n).next[counter]);
        }
        if(all_children_null(n)){
            printf("\nAll children of %c are NULL.\n",(*n).ch);
        }
    }
}*/
int main(){
    struct node *nds[26];
    init_w_null(nds,26);
    insert_full(nds,"hello");
    insert_full(nds,"help");

    insert_full(nds,"bruh");
    insert_full(nds,"lmao");
    find(nds[index_of_char('l')],"lm");
    return 0;
}

output:

List of possible keys:
[construct_str]
Segmentation fault (core dumped)

I've narrowed the problem down to construct_str. Please tell me if I'm wrong, though. FOR PEOPLE WHO DON'T KNOW WHAT A TRIE IS:

It's a structure which can store strings with the same prefix, so if we add "hello" and "help", the fourth character in both the strings would be siblings. the node of a trie contains a character and a node array with 26 members.

dfmaaa1
  • 67
  • 6
  • Can you provide a [mcve] that shows your problem? Also, read [ask]. Point is, you don't describe an actual problem ("doesn't work" is not a description). – Ulrich Eckhardt Jul 31 '22 at 15:32
  • 1
    Questions seeking debugging help should generally provide a [mre] of the problem, which includes a function `main` and all `#include` directives. You are not showing the initial function call to `construct_str`. When calling that function, did you ensure that the memory buffer pointed to by `str` is large enough so that characters can be added? – Andreas Wenzel Jul 31 '22 at 15:33
  • In particular, we need to see the initial, non-recursive call to `construct_str()`, and its context. At a guess, however, you are causing `strcat()` to overrun the bounds of the left array, and / or causing it to attempt to modify an unmodifiable array. – John Bollinger Jul 31 '22 at 15:51
  • 1
    Also, unrelated to your problem, it would be more conventional to write `n->next` in all the places where you are presently writing `(*n).next`. – John Bollinger Jul 31 '22 at 15:55
  • I feel like `strcat()` is one of those functions where, if you find yourself wanting to use it, you should take a good look at safer alternatives instead before doing so. – Shawn Jul 31 '22 at 15:55
  • @Shawn I was about to code a custom function but then used strcat – dfmaaa1 Jul 31 '22 at 15:57
  • @JohnBollinger added whole code. – dfmaaa1 Jul 31 '22 at 15:57
  • @AndreasWenzel I added the whole code. – dfmaaa1 Jul 31 '22 at 15:57
  • 1
    @dfmaaa1, "minimal reproducible example" is not the same thing as "the whole code". Follow the link in Ulrich's or Andreas's comment if you are unfamiliar with the concept. – John Bollinger Jul 31 '22 at 16:00
  • _Side note:_ You would benefit from learning about the _arrow_ operator `->`. Instead of `(*selected).next[ind]` the preferred syntax is: `selected->next[ind]` – Craig Estey Jul 31 '22 at 17:04
  • Also, doing `while (counter<=strlen(key))` takes O(n^2) time [because `strlen` must rescan the string on each iteration. To get this to linear time O(n), replace with: `while (key[counter] != 0)` – Craig Estey Jul 31 '22 at 17:09
  • @JohnBollinger In order to understand the output, don't you need to understand the whole code? All functions are being used – dfmaaa1 Aug 01 '22 at 15:13
  • @CraigEstey most compilers use caching, but I took your advice and stored the output of strlen in an integer variable – dfmaaa1 Aug 01 '22 at 15:14
  • @dfmaaa1, if you present a *shorter* whole code that produces the same output, for the same reason, then there is less that we have to understand. That is the point of an MRE. The same goes for you, too, by the way. The exercise of preparing a MRE is a powerful debugging technique in its own right. Separating the essential contributors to the problem from the irrelevant chaff helps everybody analyze the problem, including you. – John Bollinger Aug 01 '22 at 17:07
  • @dfmaaa1: The official help page for a [mre] tells you to start from scratch and then to add one thing at a time until you are able to reproduce the problem, and to remove stuff until the problem disappears and then put the last part back in. When posting your question, you stated that everything works until the line `str=strcat(str,nstr);`. Therefore, the first thing to try would have been to create a simple function `main` which contains nothing else except that line as well as a simple definition of those two function arguments. [continued in next comment] – Andreas Wenzel Aug 01 '22 at 20:00
  • 1
    @dfmaaa1: [continued from previous comment] When you run your program in a [debugger](https://stackoverflow.com/q/25385173/12149471), you will notice that when you reach the line `str=strcat(str,nstr);` (which causes the crash), the `str` argument points to the string `"lm"` and the `nstr` argument points to the string `"a"`. Therefore, it would be appropriate to define those two strings the same way as you do in your program. See [this working minimal reproducible example](https://godbolt.org/z/9veGKvzer). As you can see, all you need is a few lines in order to reproduce the problem. – Andreas Wenzel Aug 01 '22 at 20:08
  • 1
    In your posted code, you are calling the functions `construct_str` and `all_children_null` before declaring them. This is probably causing your compiler to use an implicit declaration, which may be wrong and cause trouble. Your compiler should be warning you about this. You should add a proper prototype declaration to your code, before calling these functions. – Andreas Wenzel Aug 01 '22 at 20:24

2 Answers2

4

I see that my crystal ball is well attuned today.

Starting with ...

    find(nds[index_of_char('l')],"lm");

... you are passing a string literal as the second argument to find(). In that function, a pointer to its first character is associated with parameter key.

Within find(), you are forwarding that pointer to construct_str():

            construct_str(selected,key);

, wherein it is associated with parameter str.

In construct_str(), you are passing that pointer as the first argument to strcat():

            str=strcat(str,nstr);

strcat appends the contents of the second string to the array containing the first. It does not create a new array for the concatenated result. Therefore, the left pointer must point (in)to an array that

  1. is modifiable, and
  2. contains enough space after the end of the string to accommodate the extra contents.

String literals do not satisfy either criterion. Oops. Undefined behavior results.

John Bollinger
  • 160,171
  • 8
  • 81
  • 157
2

In the line

str=strcat(str,nstr);

str is a pointer to a string literal. You are not allowed to modify them. Attempting to modify a string literal using the function strcat will invoke undefined behavior.

When calling strcat, you must ensure that the first argument is pointing to a memory buffer which

  • you are allowed to write to, and
  • has sufficient space for adding the character(s) to the string.
Andreas Wenzel
  • 22,760
  • 4
  • 24
  • 39