4

This is BST NODE

struct BST_node{
    char *name1;
    char *data1;
    struct BST_node* left;
    struct BST_node* right;
};   

struct BST_node* Insert(struct BST_node *rootptr, datatype_t *d){
if(rootptr == NULL){
    char name[66];
    char data[1466];
    rootptr = (struct BST_node*)malloc(sizeof(struct BST_node));
    rootptr->name1 = d->name;
    rootptr->data1 = d->data;
    printf("%s 1\n", rootptr->name1);
    rootptr->left = rootptr->right = NULL; 
}

if(strcmp(rootptr->name1, d->name) < 0){
    printf("%s left", rootptr->name1);
    rootptr->left = Insert(rootptr -> left, d);
}
else if(strcmp(rootptr->name1, d->name) > 0){
    printf("right\n");
    rootptr->right = Insert(rootptr -> right, d);
}
else if(strcmp(rootptr->name1, d->name)==0){
    printf("duplicate\n");
}
return rootptr;
}

So I am scanning in a CSV file using file ptrs and reading data in from datatype_d. Example this is what the data looks like in the CSV file. "Name, Data."

This is how I am reading in the csv file and calling Insert function from my main function.

int main(int argc, char** argv)
{
char* eq_csv_file = NULL;
char* eq_csv_file1 = NULL;
char* read_mode="r";
char* write_mode="w+";
struct BST_node* root = NULL;
int index = 0;


if(argv[1]!=NULL && argv[2] != NULL )
{
    eq_csv_file=argv[1];
    eq_csv_file1 = argv[2];
}
else
{
    return EXIT_FAILURE;
}

FILE* Data_input = safe_open_file(eq_csv_file,read_mode);
FILE* Data_output = safe_open_file(eq_csv_file1, write_mode);

datatype_t **earth = (datatype_t**)malloc(MAX_NUM_LINE*sizeof(datatype_t *));
datatype_t *earth_one = read(Data_input);


while((earth_one) != NULL)
{

    earth[index] = earth_one;


    if(root != NULL){
    printf("%s\n", (root->name1));

    }
    root = Insert(root, earth[index]);

    index++;
    earth_one= read(Data_input);
}

Now when I am running through this code to check if its working, its printing out Duplicate for all the data from the csv file except for the first data. I simply dont know where exactly is that I am making a mistake and changing the rootptr and making it equal to datatype_d that its printing duplicate when it checks the strcmp.

So an example would be that I read this from a CSV file.

"Dave", "Studying at UCLA"
"John", "Works at Google"
"Mike", "School teacher"

So I am suppose to insert these into the BST but for some reason, its not keeping track of the head node "Dave". It never goes to the left or right subtree but rather it says that they are duplicates when I use strcmp.

Here is the output:
Dave 1
John
duplicate
Segmentation fault: 11

This is my read code which I call from the main function to read the csv file.

read(FILE* fp)
{
char name[65];
char data[1465];
if (fscanf(fp, "%[^,] %[^\n]", name, data) == 2) {
    datatype_t *d = (datatype_t*)malloc(sizeof(datatype_t));
    d->name = name;
    d->data = data;
    return d;
}
return NULL;
}
Dave
  • 71
  • 1
  • 10
  • 3
    Please post a [mcve]. Trust me, it will save a lot of wasted time on your part and our part. – kaylum Sep 12 '16 at 02:34
  • What is `BST_node`? Can you post the rest of the code to provide the full context? – John Sep 12 '16 at 02:34
  • 1
    `if(rootptr == NULL){ ... return rootptr; }` – BLUEPIXY Sep 12 '16 at 02:35
  • Like @BLUEPIXY said, or put an `else` in front of the `if (strcmp(rootptr->name1, d->name) < 0) {` line of code. Note that the test in `else if(strcmp(rootptr->name1, d->name)==0){` is superfluous; you previously established that it was neither less than nor greater than zero, so it must be equal to zero without running `strcmp()` a third time. Just use `else` (and maybe convert the condition into an assertion inside the `else` block). – Jonathan Leffler Sep 12 '16 at 02:43
  • @BLUEPIXY : That gave mea seg fault after reading two values. – Dave Sep 12 '16 at 02:44
  • Better, but an MCVE is a self-contained program that can be compiled and run. Your code is not yet an MCVE. Look at my code for an example of an MCVE for your code — it probably could be reduced, but not by much. – Jonathan Leffler Sep 12 '16 at 03:12

1 Answers1

3

This code works. The primary change is making sure that you return immediately after setting the root node. (Note that in your trace, you get told that the first node you insert is a 'duplicate' — that's because you don't do the early return.) I used an else on the subsequent if statement rather than add an extra return rootptr;, but that would also work.

#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

typedef struct datatype_t
{
    char *name;
    char *data;
} datatype_t;

struct BST_node
{
    char *name1;
    char *data1;
    struct BST_node *left;
    struct BST_node *right;
};

struct BST_node *Insert(struct BST_node *rootptr, datatype_t *d);

struct BST_node *Insert(struct BST_node *rootptr, datatype_t *d)
{
    if (rootptr == NULL)
    {
        rootptr = (struct BST_node *)malloc(sizeof(struct BST_node));
        rootptr->name1 = d->name;
        rootptr->data1 = d->data;
        printf("%s 1\n", rootptr->name1);
        rootptr->left = rootptr->right = NULL;
    }
    else if (strcmp(rootptr->name1, d->name) < 0)
    {
        printf("%s left\n", rootptr->name1);
        rootptr->left = Insert(rootptr->left, d);
    }
    else if (strcmp(rootptr->name1, d->name) > 0)
    {
        printf("right\n");
        rootptr->right = Insert(rootptr->right, d);
    }
    else
    {
        assert(strcmp(rootptr->name1, d->name) == 0);
        printf("duplicate\n");
    }
    return rootptr;
}

static void BST_print_inorder(const char *tag, struct BST_node *node)
{
    if (tag != NULL)
        printf("%s:\n", tag);
    if (node != NULL)
    {
        BST_print_inorder(NULL, node->left);
        printf("%s (%s)\n", node->name1, node->data1);
        BST_print_inorder(NULL, node->right);
    }
}

int main(void)
{
    datatype_t earth[] =
    {
        { "Dave", "Studying at UCLA" },
        { "John", "Works at Google" },
        { "Mike", "School teacher" },
    };
    enum { NUM_LINES = sizeof(earth) / sizeof(earth[0]) };
    struct BST_node *root = NULL;
    for (int index = 0; index < NUM_LINES; index++)
    {
        if (root != NULL)
            printf("Root node: %s\n", (root->name1));
        root = Insert(root, &earth[index]);
        BST_print_inorder("Insert complete", root);
    }
    return 0;
}

Sample run:

Dave 1
Insert complete:
Dave (Studying at UCLA)
Root node: Dave
Dave left
John 1
Insert complete:
John (Works at Google)
Dave (Studying at UCLA)
Root node: Dave
Dave left
John left
Mike 1
Insert complete:
Mike (School teacher)
John (Works at Google)
Dave (Studying at UCLA)

You can't afford not to have a printing function, and you can't afford not to call it on each iteration of the inserting loop until you know everything is working.

Running with valgrind gives it a clean bill of health as far as memory access goes. I've not written the tree-free function.

Note that this code avoids having to worry about how the name and data values are allocated since they are part of an initialized array. However, in your 'real' code, you need to ensure that each name and data value is given its own memory.


Still crashing?

If you are still crashing after this, then you need to look hard at the read() code — or the other supporting code.

Note that read() is not a good function name to use on Unix-like systems; there is a read() system call too. The linker will sort things out for you so that the library code works, but if any of your code uses read() expecting the system call, it will be surprised to get your function instead.

Undefined behaviour from read()

The read() function has been posted as:

datatype_t *read(FILE* fp)
{
    char name[65];
    char data[1465];
    if (fscanf(fp, "%[^,] %[^\n]", name, data) == 2) {
        datatype_t *d = (datatype_t*)malloc(sizeof(datatype_t));
        d->name = name;
        d->data = data;
        return d;
    }
    return NULL;
}

You're returning an allocated data structure, but the two strings contained in the structure are pointers to the local variables name and data and they are no longer valid once the function exits. You need to duplicate the strings.

Also, the fscanf() format is a bit curious. It doesn't read the comma separately, so the comma is parsed as part of the second scanset. The blank in the format means 'skip optional white space', of course. So, when the first scanset stops at the comma, the blank doesn't change anything, and then the data up to the newline, starting with the comma, is read into the data. Also, the second and subsequent names start with a newline. You need a format string like:

" %[^,], %[^\n]"

The scansets, %c and %n conversion specifications are the only ones that don't skip leading white space.

For example:

datatype_t *read(FILE* fp)
{
    char name[65];
    char data[1465];
    if (fscanf(fp, " %[^,], %[^\n]", name, data) == 2) {
        datatype_t *d = (datatype_t*)malloc(sizeof(datatype_t));
        d->name = strdup(name);
        d->data = strdup(data);
        return d;
    }
    return NULL;
}

(Note: trailing space in a scanf()-family format string intended for interactive use is a usability disaster — don't use it, ever.)

Revised main using modified read():

int main(void)
{
    datatype_t *earth;
    struct BST_node *root = NULL;
    while ((earth = read(stdin)) != NULL)
    {
        if (root != NULL)
            printf("Root node: %s\n", (root->name1));
        root = Insert(root, earth);
        BST_print_inorder("Insert complete", root);
    }

    BST_free(root);
    return 0;
}

And BST_free() is:

static void BST_free(struct BST_node *node)
{
    if (node != NULL)
    {
        BST_free(node->left);
        BST_free(node->right);
        free(node->name1);
        free(node->data1);
        free(node);
    }
}

There was a memory leak in the previous code for Insert() — actually, two of them. One is for each added node; the datatype_t structure is not freed when it should be (but the members are still in use; they've been transferred to the tree). The other is when there's a duplicate found; then the new data members need to be freed too.

I've also revised main() again to simulate what the code in the question does.

struct BST_node *Insert(struct BST_node *rootptr, datatype_t *d)
{
    if (rootptr == NULL)
    {
        rootptr = (struct BST_node *)malloc(sizeof(struct BST_node));
        rootptr->name1 = d->name;
        rootptr->data1 = d->data;
        rootptr->left = rootptr->right = NULL;
        printf("%s 1\n", rootptr->name1);
        free(d);
    }
    else if (strcmp(rootptr->name1, d->name) < 0)
    {
        printf("%s left\n", rootptr->name1);
        rootptr->left = Insert(rootptr->left, d);
    }
    else if (strcmp(rootptr->name1, d->name) > 0)
    {
        printf("right\n");
        rootptr->right = Insert(rootptr->right, d);
    }
    else
    {
        assert(strcmp(rootptr->name1, d->name) == 0);
        free(d->name);
        free(d->data);
        free(d);
        printf("duplicate\n");
    }
    return rootptr;
}

...

int main(void)
{
    struct BST_node *root = NULL;

    enum { MAX_NUM_LINE = 1000 };

    datatype_t **earth = (datatype_t **)malloc(MAX_NUM_LINE * sizeof(datatype_t *));
    assert(earth != NULL);

    datatype_t *earth_one;
    size_t index = 0;

    while ((earth_one = read(stdin)) != NULL)
    {
        earth[index] = earth_one;

        if (root != NULL)
            printf("Root node: %s\n", (root->name1));
        root = Insert(root, earth[index]);
        BST_print_inorder("Insert complete", root);
        index++;
        assert(index < MAX_NUM_LINE);
    }
    printf("%zu nodes\n", index);

    BST_free(root);
    free(earth);

    return 0;
}

This compiles cleanly under GCC 6.1.0 on Mac OS X 10.11.6 using:

$ gcc -O3 -g -std=c11 -Wall -Wextra -Werror -Wmissing-prototypes -Wstrict-prototypes \
>     -Wold-style-definition ll23.c -o ll23 
$

It runs cleanly under valgrind.

I have used two data files:

data.1:

Dave, Studying at UCLA
John, Works at Google
Mike, School teacher

data.2:

Dave, Studying at UCLA
John, Works at Google
Mike, School teacher
Jane, CEO of Humdinger Enterprises
Anne, Chief Dogsbody
Beryl, Tech Lead at Apple
Mina, Doctor at Kaiser Permanente
Dave, Transfer PhD to UCB
Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • Hey Jonathan, thanks so much for the code, although Im still stuck on the seg faults. any idea why? – Dave Sep 12 '16 at 04:00
  • I guess it is related to the memory management for your `earth` array. I'm not clear why you need both the tree and the array. You normally use one or the other but not both. I'll look a bit harder when I get back from walking the dog. – Jonathan Leffler Sep 12 '16 at 04:05
  • Hi, I tracked down the error and it looks like that fscanf line doesn't scan any values. I tried with the comma that you specified earlier but its still throwing me that error! – Dave Sep 12 '16 at 05:18
  • One detail that I notice is that your sample data has double quotes around the fields in the file, but not in the output of the program. Is there any significance to this? I'm about to add another new `main()` and a mildly revised `Insert()` (that ensures memory is properly freed), but that works clean for me. – Jonathan Leffler Sep 12 '16 at 05:40
  • Oh no there is no significance to the double quote, we dont have to include them. I'll try to modify my code accordingly and get back to you. Thanks again :-) – Dave Sep 12 '16 at 05:51