0

I have a C program to implement of Trie in which each not stores some data (see below). The trie should use strings in it's branches to arrange data (ie as keys) with integer data at each terminating point in the tree.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define TRIE_LIMIT 26
#define TRIE_DATA_TYPE int
#define TRIE_DATA_DEFAULT -1
#define TRIE_NODE_NULL NULL
#define TRIE_START_CHAR 'a' // Defines the 'zero' of the tree, can use (e.g.) '0' for searches including numbers

struct trie_node {
    TRIE_DATA_TYPE data;
    struct trie_node * nodes[TRIE_LIMIT];
};

struct trie_node * make_node_data(TRIE_DATA_TYPE data) {
    struct trie_node* new_node = (struct trie_node*)malloc(sizeof(struct trie_node*));
    if (new_node != NULL) {
        new_node->data = malloc(sizeof(TRIE_DATA_TYPE));
        new_node->data = data;
        for (int i = 0; i < TRIE_LIMIT; i++) {
            new_node->nodes[i] = NULL; // TRIE_NODE_NULL;
        }
    }
    return new_node;
}

struct trie_node * make_node_default() {
    return make_node_data(TRIE_DATA_DEFAULT);
}

void insert_trie(struct trie_node * root, char * val, TRIE_DATA_TYPE data) {
    // Based off of my solution to HackerRank "1 Week Preparation kit": "No Prefix Set"
    // Modified from:
    // https://www.digitalocean.com/community/tutorials/trie-data-structure-in-c-plus-plus
    // https://www.techiedelight.com/trie-implementation-insert-search-delete/
    struct trie_node * current = root;
    int length = strlen(val);

    for (int i=0; i < length; i++) { // Can replace with search for '\0'
        int index = val[i] - TRIE_START_CHAR;
        if (current->nodes[index] == NULL) {
            // Node does not yet exist in tree, append
            //printf("Making node for letter %s\n", val[i]);
            current->nodes[index] = make_node_default();
        }
        else {
            // Can do something else
        }
        current = current->nodes[index];
    }
    // Reached the final branch, can add our data here.
    if (current->data == TRIE_DATA_DEFAULT) {
        current->data = data;
    }
    else {
        // Handle case were data already exists at node;
    }
    current = root;
    return;
}

TRIE_DATA_TYPE find_data(struct trie_node* root, char* val) {
    // Searches a tree for val.
    // returns TRIE_DATA_TYPE if it doesn't exist
    // returns current->node
    // Modified from:
    // https://www.digitalocean.com/community/tutorials/trie-data-structure-in-c-plus-plus
    // https://www.techiedelight.com/trie-implementation-insert-search-delete/

    struct trie_node* current = root;
    int length = strlen(val);
    for (int i = 0; i < length; i++) { // Can replace with search for '\0'
        int index = val[i] - TRIE_START_CHAR;
        if (current->nodes[index] == NULL) {
            // Node does not yet exist in tree, return TRIE_DATA_DEFAULT
            //printf("Can't find val in root, i=%d returning...\n", i);
            current = root;
            return TRIE_DATA_DEFAULT;
        }
        else {
            // Node exists in tree, continue
            current = current->nodes[index];
        }
    }
    // Got to the end of the tree, returning value stored
    // Can check if there are daughter nodes, etc
    TRIE_DATA_TYPE data = current->data;
    current = root;
    return data;
}

void print_find(struct trie_node* root, char  * val) {
    TRIE_DATA_TYPE trie_val = find_data(root, val);
    if (trie_val == TRIE_DATA_DEFAULT) {
        printf("'%s' not in root!\n", val);
    }
    else {
        printf("Data at '%s': %d\n", val, trie_val);
    }
    return;
}


int main() {
    struct trie_node* root = make_node_default();
    insert_trie(root, "abc", 5);
    insert_trie(root, "abcd", 6);
    print_find(root, "abc"); // Should print: Data at abc: 5
    print_find(root, "abcd");// Should print: Data at abcd: 6
    print_find(root, "trie");// Should print: trie not in root!

    free(root);

    return 0;
}

Using the debugger and examining the values in the 'root' trie at runtime, what we see is that the tree appears to initialize correctly (data is -1, and all values in the nodes are NULL). But eventually (sometimes the first "insert" step, sometimes the second, sometimes during the "print_find"), the values take on some "nonsense" values for some of the values that should remain NULL (example below; [0] is correct, the rest should be NULL; taken on a breakpoint located on the first "print_find" step).

Null-nonsense

*Note that this is an example, and each time I run the application, I get slightly different results, sometimes I make it all the way through, but usually I hit some nonsense value of data and it claims that the value at 'trie' is -7600000 or something.

I've tried cleaning and rebuilding, as well as different ways of allocating (malloc, vs calloc, vs nothing).

What I was expecting is that the values would remain NULL during runtime and I could use checks for NULLs to progress through the trie; instead I often end up with garbage as it finds non-NULL values and returns those values of data. This looks like the memory is allocated correctly, but then reused by other processes or something during runtime, but I'm not sure how to prevent such behaviour.

Vlad from Moscow
  • 301,070
  • 26
  • 186
  • 335
Dace
  • 9
  • 1
  • 1
  • 3
    `new_node->data = malloc(sizeof(TRIE_DATA_TYPE));` won't work. `new_node->data` is an `int` not an `int*`. Why dynamically allocate space for _one_ `int` anyway? And `struct trie_node* new_node = (struct trie_node*)malloc(sizeof(struct trie_node*));` only allocates space for a `trie_node` pointer, not a `trie_node`. – Ted Lyngmo Apr 08 '23 at 20:17
  • 2
    Turn up the warning level in your compiler and pay attention to them. Use tools like AddressSanitizer to help you find errors. https://godbolt.org/z/oxxYbqhG5 There are multiple allocation errors in `make_node_data` leading to a buffer overflow. – Retired Ninja Apr 08 '23 at 20:20
  • For some working trie solutions, see my answers: [I am trying to initialize a glogal structure(trie_node) as the head of a trie in my main function and am getting memory allocation problems](https://stackoverflow.com/a/62236036/5382650) and [Implement N-ary Tree in c](https://stackoverflow.com/a/62159300/5382650) – Craig Estey Apr 08 '23 at 20:22

1 Answers1

2

At least the function make_node_data is invalid,

For starters you need to allocate memory for a node instead of for a pointer to a node

struct trie_node* new_node = (struct trie_node*)malloc(sizeof(struct trie_node*));
                                                       ^^^^^^^^^^^^^^^^^^^^^^^^

That is you need to use expression sizeof( struct trie_node ) instead of sizeof( struct trie_node * ).

The data member data has the type int

#define TRIE_DATA_TYPE int
//...
struct trie_node {
    TRIE_DATA_TYPE data;
    struct trie_node * nodes[TRIE_LIMIT];
};

So thus memory allocation

    new_node->data = malloc(sizeof(TRIE_DATA_TYPE));

does not make sense and produces a memory leak:

new_node->data = data;

Within the function insert_trie the call of strlen

int length = strlen(val);

is redundant. The following for loop could look for example like

for ( ; *val != '\0'; val++ )

Also these statements within the body of the for loop

int index = val[i] - TRIE_START_CHAR;
if (current->nodes[index] == NULL) {

are unsafe and can result in undefined behavior.

And the function should be declared like

void insert_trie(struct trie_node * root, const char * val, TRIE_DATA_TYPE data);

Also this statement

current = root;

that is present in your functions usually before return statements has no effect.

Vlad from Moscow
  • 301,070
  • 26
  • 186
  • 335
  • First of all, thank you; changing """(struct trie_node*)malloc(sizeof(struct trie_node*))""" to """(struct trie_node*)malloc(sizeof(struct trie_node))""" seems to solve the question I was asking. – Dace Apr 08 '23 at 20:45
  • Second, I'm not sure I fully understand how to replace the for loop in your suggestion. I only want to add nodes when there is a NULL value where my next letter from val ought to be; how do I check for that "safely"? – Dace Apr 08 '23 at 20:51
  • @Dace A string can contain different symbols as for example spaces. In this case your code will result in undefined behavior. You need to check that the current symbol is indeed a low-case letter or if it is an uooer-case letter then you should to convert it to a low-case letter or ignore it.. – Vlad from Moscow Apr 08 '23 at 20:56
  • I don't think I understand, in the "insert_trie" function, how would I tell if it is safe to add a node unless I check if the value at the proposed location isn't already NULL? – Dace Apr 08 '23 at 21:18
  • @Dace I mean you need to check that the string indeed contains letters. What will be result of the function if it will be called with string "123"? – – Vlad from Moscow Apr 08 '23 at 21:29
  • Oh, I understand that; for my purposes I'm assuming that the user is only using [a-z] for this toy example. Is the second part ("if (current->nodes[index] == NULL)") safe, assuming that index is not something silly? – Dace Apr 08 '23 at 21:30
  • 1
    @Dace It is safe provided that the processed character is a low case letter. – Vlad from Moscow Apr 08 '23 at 21:40