0

So I'm writing a code to implement a data structure somewhat similar to a partial trie. Here's the code

#include<bits/stdc++.h>
#include"utils.h"

using namespace std;

template<std::size_t N>
struct node_x_t {
    static_assert(N <= 256, "N must be <= 256");
    int num_elements;
    char chars[N];
    node_x_t<256>* children[N];
};

node_x_t<256>* root;

node_x_t<256>* createNode(char c){
    node_x_t<1>* temp = new node_x_t<1>;
    temp->num_elements = 1;
    temp->chars[0] = c;
    temp->children[0] = NULL;
    return (node_x_t<256>*)temp;
}

void add(string key){
    node_x_t<256>* nav = root;
    node_x_t<256>* prev = root;
    int prevIdx=0;
    int size = key.size();
    for(int i=0;i<size;i++){
        int idx = -1;
        if(nav==NULL){
            nav = createNode(key[i]);
            if(i!=0){
                prev->children[prevIdx] = nav;
                prev = nav;
                prevIdx=0;
            }
            else{
                prev = nav;
                root = nav;
                prevIdx=0;
            }
            cout<<"NEW: "<<nav->chars[0]<<" ";
            nav = nav->children[0];
            continue;
        }

        int nodeSize = nav->num_elements;
        for(int j=0;j<nodeSize;j++){
            if(nav->chars[j] == key[i]){
                idx = j;
                break;
            }
        }

           
        if(idx!=-1){
            prev = nav;
            prevIdx = idx;
            nav = nav->children[idx];
            cout<<"HIT: "<<prev->chars[idx]<<" ";
        }
    }
    cout<<"\n";
}


void test(){
    root=createNode('0');
    add("a");
    add("ab");
    add("abc");
    add("abcd");
    add("abcde");
    add("abcdef");
    add("abcdefg");
    add("abcdefgh");
    add("abcdefghi");
    add("abcdefghij");
    add("abcdefghijk");
}

int main(){
    test();
}

The chars[] array stores the character next to the current node and the children[] array has a pointer to the node containing the next character

I have added "NEW" and "HIT" statements to indicate whether the prefix of the current string to be added has already been added earlier(testing purposes).

Here's the output

NEW: a 
HIT: a NEW: b 
HIT: a HIT: b NEW: c 
HIT: a HIT: b HIT: c NEW: d 
HIT: a HIT: b HIT: c HIT: d NEW: e 
HIT: a HIT: b HIT: c HIT: d HIT: e NEW: f 
HIT: a HIT: b HIT: c HIT: d HIT: e HIT: f NEW: g 
HIT: a HIT: b HIT: c HIT: d HIT: e HIT: f HIT: g NEW: h 
HIT: a HIT: b HIT: c HIT: d HIT: e HIT: f HIT: g HIT: h NEW: i 
HIT: a HIT: b HIT: c HIT: d HIT: e HIT: f HIT: g HIT: h HIT: i NEW: j 
HIT: a HIT: b NEW: c NEW: d NEW: e NEW: f NEW: g NEW: h NEW: i NEW: j NEW: k 

Basically after the 10th string is added, it seems like most of the nodes are deleted automatically and are created again. However, ideally the code must have given a HIT for the string "abcdefghijk" as well.Could you find the bug in the code?

1 Answers1

2

Could you find the bug in the code?

The bug in your code is the assumption that template types can be cast to each other with modified template parameters. That assumption is just plainly wrong. node_x_t<1> is a different type then node_x_t<256> and accessing one object using a pointer to the other type is just as wrong as accessing a double via an int pointer. Overall, if you cast pointers or use reinterpret_cast it could be that you should rethink your approach.

In this case, node_x_t could dynamically allocate the number of elements it stores, if the intention is that the number of elements changes on runtime. For example, something along:

struct node_x_t {
    node_x_t(std::size_t N) : num_elements(N), chars(N), children(N)
    {
        assert(N <= 256); // TODO: throw exception
    }
    node_x_t() : node_x_t(0) {}
    node_x_t(char c) : node_x_t(1) {
         chars.at(0) = c;
    }
    int num_elements;
    // at least use std::array
    std::vector<char> chars;
    std::vector<std::shared_ptr<node_x_t>> children;
};

Or you could stick with a constant compile-time limit to number of elements, then you need to use only that single one type. Then your functions should also always use that template type. Really, these functions look like should be member functions.

template<std::size_t N>
struct node_x_t {
    static_assert(N <= 256, "N must be <= 256");
    int num_elements;
    std::array<char, N> chars;
    std::array<std::shared_ptr<node_x_t>, N> children;

    static std::shared_ptr<node_x_t<N>> createNode(char c);
    void add(std::string key) {
       // maybe shared_ptr is too much - no idea, I do not understand the algorithm
       node_x_t<N>* nav = *this;
       node_x_t<N>* nav = *this; // use N everywhere!
       // blabla
    }
};
KamilCuk
  • 120,984
  • 8
  • 59
  • 111