0

I'm making a regex parser. The instruction "t->left = tmp;" the instruction "t->left = tmp;" creates a segmentation fault, but not always ! Try executing the code several times, you'll see that it doesn't always happen. The segmentation fault occurs when printing the tree, because one of the nodes has a child with address "0x62" or "0x54" or something similar. It's really weird because when "tmp" is created I check that both children are NULL but somehow one of them gets modified to "0x.." while executing.

Thanks to anyone that could help me solve this !

I spent quite a while trying to figure out why just adding a "left children" while parsing the tree will create a segmentation fault. I really don't understand because it's a simple pointer creation and assignment ! Comment the instruction "t->left = tmp;" and the segmentation fault is gone ! The weirdest issue so far..

Best regards !

/*
 *  Includes
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/*
 *  Typedefs
 */

struct node {
    char val;

    struct node* left;
    struct node* right;
};

typedef struct node NODE;
typedef struct node * TREE;

/*
 *  Tree handling 
 *  - create
 *  - free
 *  - print
 */

TREE createNode(char val) {
    TREE t = (TREE) malloc(sizeof(TREE));
    t->val = val;
    t->left = NULL;
    t->right = NULL;

    return t;
}

void freeTree(TREE t) {
    if (t != NULL) {
        if (t->left != NULL)
            freeTree(t->left);
        if (t->right != NULL)
            freeTree(t->right);
        free(t);
    }
}

void printNode(TREE t) {
    printf("------ NODE ------\n");
    printf("t == %p\n", t);
    if (t != NULL) {
        printf("val = %c\n", t->val);
        printf("left : %p\n", t->left);
        printf("right : %p\n", t->right);
    }
}

void printTree(TREE t) {
    if (t != NULL) {
        printf("%c\t[%p]\n", t->val, t);
        printf("%p\n", t->left);
        if (t->left != NULL) {
            printf("----------------begin left----------------\n");
            printTree(t->left);
            printf("----------------end left----------------\n");
        }
        printf("%p\n", t->right);
        if (t->right != NULL) {
            printTree(t->right);
        }
    }
}

/*
 *  Misc functions
 */

char* concat(char *s1, char *s2)
{
    char *result = malloc(strlen(s1)+strlen(s2)+1);//+1 for the zero-terminator
    //in real code you would check for errors in malloc here
    strcpy(result, s1);
    strcat(result, s2);
    return result;
}

char* substr(char* s, int start) {
    int i = 0;
    char* sub = malloc(4*(strlen(s) - start) + 1); // +1 for the zero-terminator
    while (s[i+start] != '\0') {
        sub[i] = s[i+start];
        i++;
    }
    return sub;
}

char* substr_(char* s, int start, int end) {
    int i = 0;
    char* sub = malloc(4*(strlen(s) - start) + 1); // +1 for the zero-terminator
    while (s[i+start] != '\0' && i < (end-start+1)) {
        sub[i] = s[i+start];
        i++;
    }
    return sub;
}

/*
 *  Regex handling
 */

TREE parseParenthesis(char* regex) {

    if (regex[0] == '\0') return NULL;

    printf("%s\n",regex);

    TREE tree = createNode(regex[0]);

    int i = 0;
    int start = 1;
    int lastParenthesisPos = 0;;

    switch (regex[0]) {
        case '(' :
            while (regex[i] != '\0') {
                if (regex[i] == ')')
                    lastParenthesisPos = i;
                i++;
            }
            tree->left = parseParenthesis(substr_(regex, 1, lastParenthesisPos-1));
            start = lastParenthesisPos + 1;
        break;
        case '|' :
        case ')' : // Handled by case ')'
        case '*' :
        case '+' :
        case '?' :
        default : break;
    }

    tree->right = parseParenthesis(substr(regex, start));

    return tree;

}

void parseExtras(TREE t, TREE parent) {

    if (t == NULL) return;

    TREE tmp = NULL;

    switch (t->val) {
        case '*' :
        case '+' :
        case '?' :
            parseExtras(t->right, t);
            tmp = createNode(parent->val);
            t->left = tmp;
        break;
        case '(' :
        case ')' :
        case '|' :
        default :
            parseExtras(t->left, t);
            parseExtras(t->right, t);
        break;
    }

}

/*
 *  Main
 */

int main() {

    //char* regex = "a|(b|c*)?d|ef";
    char* regex = "ab*dd*";

    // Parse ()
    TREE t = parseParenthesis(regex);
    printf("************************************ OK - Parse parenthesis\n");
    // Parse * + ?
    parseExtras(t, NULL);
    printf("************************************ OK - Parse extras\n");

    printTree(t);
    printf("************************************ OK - Print tree\n");

    freeTree(t);
    printf("************************************ OK - Free tree\n");

    return 0;

}
user2154283
  • 291
  • 1
  • 5
  • 14
  • 2
    Side Note: For all that is good in this world (and that ain't much nowadays) *please* don't hide pointer-types in typedefs like `TREE` unless you're purposely abstracting a handle-type for an API-like library or specifying an actual useful entity such as a function pointer type. Whether you realize it or not the *lack* of asterisks actually makes the code *harder* to read and decipher, and more prone to errors (like the one you have in your code). (and [don't cast malloc in C programs](http://stackoverflow.com/questions/605845/do-i-cast-the-result-of-malloc)). – WhozCraig Mar 17 '14 at 20:10
  • In function `parseExtras`, what's the point in the three cases that come right before the `default`??? And in function `parseParenthesis`, what's the point in the five cases that come right before the default??? – barak manos Mar 17 '14 at 20:11
  • Hi, the presence of the three cases is just for myself, but I'll comment it. Even when not casting the pointer the set-fault still occurs, but this time it occurs every time I execute the code. – user2154283 Mar 17 '14 at 20:21
  • I find the style of `type *var = malloc(sizeof *var);` less error prone and easier to maintain. Example: `TREE t = malloc(sizeof *t);` – chux - Reinstate Monica Mar 17 '14 at 20:46

1 Answers1

2

You are allocating space only for a pointer, not the actual struct:

    TREE t = (TREE) malloc(sizeof(TREE));

After this, you are modifying memory that is beyond the size of TREE (which is a pointer).

As a comment above said, don't use typedef unnecessarily.

ldx
  • 3,984
  • 23
  • 28
  • Hi :) The seg-fault occurs even if I remove the cast, but this time it happens every time. I used the typedef because it was easier to read the code like that (it's just a personal opinion). – user2154283 Mar 17 '14 at 20:24
  • 2
    @user2154283 You need to look at that again. This answer is saying the allocation *size* is wrong. (see the first sentence). It could/should be `malloc(sizeof(*t))`, which, with your typedef'd pointer type, is entirely **non**-intuitive. – WhozCraig Mar 17 '14 at 20:26
  • I get it now :) I changed it to malloc(NODE) and now it works. thanks so much for your help ! – user2154283 Mar 17 '14 at 20:34