0

Following is my code in C for the implementation of B-Tree. The code works very strange sometimes as it gives 'Segmentation Fault' sometimes and if I run it again and provide the same input values, it works just fine.

It also happens that, I entered 1 value (e.g, 500) now this should be the root value, but the code also prints two empty (nil) child values.

And all this behavior is erratic, it does not happen every time. It almost never happens second time. I am sure, this has something to do with memory. Can someone suggest me a cure.

Thanks in advance !!

#include <stdio.h>
#include <stdlib.h>

#define ORDER 4

struct node{
    int n;
    int keys[ORDER-1];
    struct node *children[ORDER];
};

struct toReturn{
    int result;
    struct node* nodeReturn;
};

struct node* splitNodeAndAdd(struct node* , int );
struct  node* insertInTree(struct node* , int );
struct toReturn* insertRecursive(struct node *, int );
struct node* splitNodeAndAddNode(struct node* , struct node* );


struct node* addElement(struct node *, int);
void printTree(struct node*, int);
void addAndSort(int *, int, int);
int checkLeaf(struct node* node);




int main(){

    int inputElement = 0;
    printf("Enter the element you want to add. Press 0 to quit\n");
    scanf("%d",&inputElement);


    struct node * root;
    root = (struct node*) malloc(sizeof(struct node));

    while(inputElement != 0){
        root = addElement(root,inputElement);
        printTree(root,0);
        scanf("%d",&inputElement);
    }

    return 0;
}

struct node* addElement(struct node * rootNode, int inputElement){

    if(rootNode->n == 0){
        rootNode->keys[0] = inputElement;
        rootNode->n += 1;
        return rootNode;
    }
    else{
        rootNode = insertInTree(rootNode,inputElement);
        return rootNode;
    }
}


struct  node* insertInTree(struct node* node, int inputElement){

    if(checkLeaf(node) == 0){           //It is leaf node.
        if(node->n == ORDER - 1){
            struct node * temp = node;
            struct node *newRoot = (struct node*) malloc(sizeof(struct node));
            node = newRoot;
            newRoot->children[0] = temp;
            newRoot = splitNodeAndAdd(newRoot,inputElement);
            return newRoot;
        } else{
            addAndSort(node->keys,node->n,inputElement);
            node->n += 1;
        }
    } else{
        //recursive add . DO IT.
        struct toReturn * returnedValue = insertRecursive(node,inputElement);
        node = returnedValue->nodeReturn;
    }
    return node;
}


//Change split node and add before running. Won't work otherwise.
struct toReturn* insertRecursive(struct node *node, int inputElement){

    if(checkLeaf(node) == 0){
        if(node->n == ORDER - 1){
            struct node * temp = node;
            struct node *newRoot = (struct node*) malloc(sizeof(struct node));
            node = newRoot;
            newRoot->children[0] = temp;
            newRoot = splitNodeAndAdd(newRoot,inputElement);
            struct toReturn *value = (struct toReturn*) malloc(sizeof(struct toReturn));
            value->result = 0;
            value->nodeReturn = newRoot;
            return value; // Also send parameter to show its done.


        } else{
            addAndSort(node->keys,node->n,inputElement);
            node->n += 1;
            struct toReturn *value = (struct toReturn*) malloc(sizeof(struct toReturn));
            value->result = 1;
            value->nodeReturn = node;
            return value; // Also send parameter to show its done.
        }
    }
    else{
        int i = node->n;
        i -= 1;

        while( i >= 0 && inputElement < node->keys[i]){
            i -= 1;
        }
        i += 1;
        //Go to child node of this using 'i'
        struct toReturn* returnedValue = insertRecursive(node->children[i],inputElement);

        if(returnedValue->result == 0){
            //Now we have a node in returnedValue to be added to current node.

            //Check if current root is also going to be full.
            if(node->n == ORDER - 1){
                struct node* currentNode = node;
                struct node* nodeToAdd = returnedValue->nodeReturn;
                struct node* newRoot = (struct node*) malloc(sizeof(struct node));
                newRoot->children[0] = currentNode;
                newRoot = splitNodeAndAddNode(newRoot,nodeToAdd);


                struct toReturn* temp = (struct toReturn*) malloc(sizeof(struct toReturn));
                temp->result = 0;
                temp->nodeReturn = newRoot;// whatever is returned from splitNodeAndAddNode.
                return temp;
            } else{

                int k = i;
                for(k = node->n; k>i; k--){
                    node->keys[k] = node->keys[k-1];
                }
                for(k = node->n + 1; k > i; k--){
                    node->children[k] = node->children[k-1];
                }

                node->keys[i] = returnedValue->nodeReturn->keys[0];
                node->n += 1;
                node->children[i] = returnedValue->nodeReturn->children[0];
                node->children[i+1] = returnedValue->nodeReturn->children[1];
                returnedValue->result = 1;
                returnedValue->nodeReturn = node;
            }
        }else{
            node->children[i] = returnedValue->nodeReturn;
            returnedValue->nodeReturn = node;
        }
        return returnedValue;
    }
}

struct node* splitNodeAndAddNode(struct node* node, struct node* toAdd){

    struct node* leftChild = node->children[0];
    struct node* allChildren[ORDER+1];

    int i = 0;
    for(i = 0; i<ORDER; i++){
        allChildren[i] = leftChild->children[i];
    }

    int childrenCount = 0;

    int j = 0;
    struct node* rightChild = (struct node*) malloc(sizeof(struct node));
    int array[ORDER];

    for(i=0; i<ORDER - 1; i++){
        array[i] = leftChild->keys[i];
    }
    addAndSort(array,ORDER-1,toAdd->keys[0]);
    int addedPosition = 0;
    for(i=0; i<ORDER; i++){
        if(array[i] == toAdd->keys[0]){
            addedPosition = i;
        }
    }

    for(j=ORDER-1; j>= addedPosition; j--){
        allChildren[j+1] = allChildren[j];
    }
    allChildren[addedPosition] = toAdd->children[0];
    allChildren[addedPosition+1] = toAdd->children[1];


    int median = ORDER / 2;
    node->keys[0] = array[median];
    node->n += 1;

    //add left and right child of node.
    for(i = 0; i<median; i++){
        leftChild->keys[i] = array[i];
        leftChild->children[i] = allChildren[childrenCount];
        childrenCount++;
    }
    leftChild->children[i] = allChildren[childrenCount];
    childrenCount++;
    leftChild->n = median;

    for(i = median; i<ORDER-1; i++){
        leftChild->keys[i] = 0;
    }

    int k = 0;

    for(i = median+1; i<ORDER; i++){
        rightChild->keys[k] = array[i];
        rightChild->n += 1;
        rightChild->children[k] = allChildren[childrenCount];
        childrenCount++;
        k++;
    }

    rightChild->children[k] = allChildren[childrenCount];
    childrenCount++;

    node->children[0] = leftChild;
    node->children[1] = rightChild;
    return node;
}


struct node* splitNodeAndAdd(struct node* rootNode, int inputElement){

    struct node* leftChild = rootNode->children[0];
    struct node* rightChild = (struct node*) malloc(sizeof(struct node));

    int i = 0;
    int j = 0;

    int array[ORDER];
    for(i =0; i<ORDER-1;i++){
        array[i] = leftChild->keys[i];
    }
    addAndSort(array,ORDER-1,inputElement);

    int medianIndex = 0;
    for(i = 0; i<ORDER; i++){
        if(inputElement == array[i]){
            medianIndex = i;
            break;
        }
    }
    int median = ORDER / 2;

    for(j=0; j<median; j++){
        leftChild->keys[j] = array[j];
    }

    for(j=median; j<ORDER-1;j++){
        leftChild->keys[j] = 0;
    }

    leftChild->n = median;
    rootNode->keys[0] = array[median];
    rootNode->n += 1;

    int k = 0;
    for(j= median+1; j<ORDER; j++){
        rightChild->keys[k] = array[j];
        rightChild->n += 1;
        k++;
    }
    rootNode->children[0] = leftChild;
    rootNode->children[1] = rightChild;

    //Have to add all children if this is not leaf node.
    //Have to change rootNode[0] to long term picture.

    return rootNode;

}




void printTree(struct node *a, int level){

    printf("%d : ",level);
    for(int i=0; i<a->n; i++){
        printf("%d ",a->keys[i]);
    }
    printf("\n");
    if(checkLeaf(a) == 1){
        for(int i=0; i <= a->n ; i++){
            printTree(a->children[i],level+1);
        }
    }else {
        return;
    }

}


int checkLeaf(struct node* node){
    int i = 0;
    if(node->children[i] != NULL){
        return 1;
    }
    return 0;
}


void addAndSort(int *array, int elementCount, int inputElement){
    int i = 0;

    for(i = elementCount-1; i >=0 && array[i] > inputElement; i--){       //else find the best
        array[i+1] = array[i];
    }
    array[i+1] = inputElement;
}
Gauraang Khurana
  • 707
  • 1
  • 8
  • 18
  • The segfault should always happens around the same use of variable. If under linux, try running the program under gdb and, when it forcefully stops because of seg fault, type the "where" function to see at which line all's happening. This will help us help you, or you might even see the issue just from that. (If using windows, you can still use gdb but msvc++ debugger should already tell you where it happens, just tell us. – Alceste_ Apr 22 '18 at 04:43
  • 2
    *The code works very strange sometimes as it gives 'Segmentation Fault' sometimes and if I run it again and provide the same input values, it works just fine* this is classic undefined behaviour. Do you really expect us to read your wall of code to find all the places where you might have undefined behaviour? Please provide a [mcve]. One hint though: you only allocated memory for the root node in `main`, you didn't initialize it, so the whole `addElement` runs on uninitialized values, hence the whole program will show the undefined behaviour. Think about that. This applies for all `malloc` – Pablo Apr 22 '18 at 04:49
  • Hey @Pablo, thank you for the suggestion. Yes, you're right, I cannot expect someone to go through the whole code. My bad. Can you elaborate a bit on "allocated memory for the root node in main, you didn't initialize it", I am not sure if I follow. I think I reserved memory, then why is it causing such erratic behavior. I presume it takes garbage value sometimes, am I right ? – Gauraang Khurana Apr 22 '18 at 14:12
  • I've written an answer elaborating on my previous comment. This was too large for a comment, so I wrote an answer instead. – Pablo Apr 23 '18 at 01:22
  • A good way to get more-consistent behavior in C and C++ is to always zero out the fields of a structure that you don’t set explicitly. C99-style initialization does this, and so does `calloc()`, but `malloc()` does not. You can do this manually with `memset()`. Also be on the lookout for uninitialized variables and out-of-bounds array indices. – Davislor Apr 23 '18 at 03:14

1 Answers1

2

As I said in the comments, this is to much code to review, specially when you don't have given us enough hints where to begin to look at the problem.

Can you elaborate a bit on allocated memory for the root node in main, you didn't initialize it, I am not sure if I follow. I think I reserved memory, then why is it causing such erratic behaviour. I presume it takes garbage value sometimes, am I right ?

I briefly looked over the code and I noticed the same pattern. You just allocate memory, but you don't initialize the memory before reading the contents of the allocated memory. This yields undefined behaviour and what you are observing, sometimes working and sometimes not, it's a classical sign of undefined behaviour. Because of the nature of undefined behaviour, it is very hard and sometimes nearly impossible to predict what will happen. In most cases all you need to do is to find the source of the undefined behaviour.

In your case the first thing I did was look at your malloc calls and see where you initailize the memory. You failed to do that, so I stopped looking for more errors, because this is most likely the source of all your problems.

When you allocate memory with malloc, you only get a chunk of memery from the operating system, there is no guarantee that the memory is initialized in any way, that means that the memory has randon values. You do in main

root = (struct node*) malloc(sizeof(struct node));
while(inputElement != 0) {
    root = addElement(root,inputElement);
    ...
}

and then addElement does:

struct node* addElement(struct node * rootNode, int inputElement){

    if(rootNode->n == 0){
        rootNode->keys[0] = inputElement;
        ...
    } else {
        rootNode = insertInTree(rootNode,inputElement);
        ...
    }
}

As you see, you've allocated memory for the root node, but root->n is not initialized, it's value is random, it might be 0, but it also might be 24 or -12389. So your code is already doing something unpredictable, because by just looking at the code you cannot know whether rootNode->keys[0] = inputElement; is executed, or rootNode = insertInTree(rootNode,inputElement);. That's the nature of undefined behaviour. In the lucky case that rootNode->n is 0, the function might work correctly.

You have to do

root = malloc(sizeof *root);

if(root == NULL)
{
    fprintf(stderr, "No memory left\n");
    return 1;
}

// initializing the memory
root->n = 0;
memset(root->keys, 0, sizeof root->keys / sizeof root->keys[0]);
for(size_t i = 0; i < sizeof root->children / sizeof root->children[0]; ++i)
    root->children[i] = NULL;

while(inputElement != 0) {
    root = addElement(root,inputElement);
    ...
}
  1. Don't cast malloc
  2. Check always, and I really mean always the return value of malloc/realloc
  3. In case you've missed, check always, and I really mean always the return case of malloc/realloc
  4. Avoid using sizeof(struct node), use sizeof *root instead, makes the code more robust.
  5. Don't forget to free() the memory.
  6. In case you've missed, don't forget to free() the memory

I'd also suggest that you create a function that returns you a new allocated + initialized node, otherwise you'll repeat the same code again and again. And this applies to all your malloc calls.

Of course in your case the initialization can be avoided by using calloc instead of malloc. calloc works like malloc but it also sets the allocated memory to 0. This is a great feature because if your memory has to be initialized with 0 and NULL pointers1, this saves a lot of time and you could do

root = calloc(1, sizeof *root);

if(root == NULL)
{
    fprintf(stderr, "No memory left\n");
    return 1;
}

// initializing the memory with 0 is not needed
// anymore as calloc takes care of that

while(inputElement != 0) {
    root = addElement(root,inputElement);
    ...
}

Make this changes throughout your code and this will eliminate many of the sources for undefined behaviour. That doesn't mean that all your problem are solved though.


Footnotes

1Legends say that there are architectures out there where NULL is not interpreted as 0, so using calloc for initializing NULL pointers might fail. But I dare anyone to find any commercially successful architecture that people use productively these days where this is the case.

Pablo
  • 13,271
  • 4
  • 39
  • 59
  • Thanks a lot for all the information and for your clear and elaborate answer. It cleared all my doubts about memory. Whatever I was taught in my class makes sense to me after reading this post by you. And yes you were right, we were recommended to use 'calloc' and now I have used that in my code. Lastly, my only question would be, I am slightly puzzled as I didn't get any undefined behavior from this code when I ran it on some online websites, like - https://www.onlinegdb.com/. Whereas I got that erratic behavior when I ran it on my system. Any specific reason ? – Gauraang Khurana Apr 24 '18 at 02:36
  • @GauraangKhurana that's the nature of undefined behaviour, it's undefined, it hard to make a prediction. With some compilers, when the moon is at the correct place and Jupiter a Mars align, your code might work, with other compilers it won't. You are observing classical undefined behaviour. – Pablo Apr 24 '18 at 08:50