-2

I am having trouble loading data into my trie structure. I keep getting a sgemenation fault. Does this have to do with my malloc? Anyone see any issues?

Thanks

/**
 * dictionary.c
 *
 * Computer Science 50
 * Problem Set 5
 *
 * Implements a dictionary's functionality.
 */

#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "dictionary.h"


#define ASCII_OFFSET 97;

/** Node of the data structure used to store the dictionary key words
 * Data Structures Type is Tries
 * Includes a bool to indicate if the current node is the end of a word
 * A pointer to the nodes child node
 */
typedef struct node
{
    bool is_word;
    struct node* children[27];
}
node;
node* rootNode;
node* nextNode;

//Variable to track the size of the dictinoary
int wordCount = 0;

/**
 * Returns true if word is in dictionary else false.
 */

bool check(const char* word)
{
    //Get words length
    int wordLength = strlen(word);

    for(int i = 0; i < wordLength; i++)
    {
        //taking the character we need to check
        int charToCheck = tolower(word[i]) - ASCII_OFFSET;

        //Checking to see if the char exists in the data strucutre, Trie;
        if(nextNode->children[charToCheck] == NULL)
        {
            return false;
        }

        //Advance the next node down the trie
        nextNode = nextNode->children[charToCheck];
    }

    nextNode = rootNode;
    //Return what is_word return
    return nextNode->is_word;
}

/**
 * Loads dictionary into memory.  Returns true if successful else false.
 */

bool load(const char* dictionary)
{
    //Open dict.. file to read 
    FILE* file = fopen(dictionary,"r");

    //Check if the dict.. exsists!
    if(file == NULL)
    {
        return false;
    }

    //Creating the first node in our data strucutre
    rootNode = malloc(sizeof(node));
    nextNode = rootNode;

    //Get character to store
    int character = fgetc(file) - ASCII_OFFSET;

    //Go through the dict... file 
    while(character != EOF)
    {
        //Go through each word in the file
        while(character != '\n')
        {
            //Add into our data structure
            if(nextNode->children[character] == NULL)
            {
                //Create memory inorder to insert the next node
                nextNode->children[character] = malloc(sizeof(node));
                nextNode = nextNode->children[character];
            }else {
                nextNode = nextNode->children[character];
            }

            //advance character to next
            character = fgetc(file) - ASCII_OFFSET;
        }

        //advance character to next word
        character = fgetc(file) - ASCII_OFFSET;

        //Set the last node loaded to is_word to track the end of each word
        nextNode->is_word = true;
        wordCount++;
        nextNode = rootNode;
    }


    fclose(file);
    return true;
}

/**
 * Returns number of words in dictionary if loaded else 0 if not yet loaded.
 */

unsigned int size(void)
{
    return wordCount;
}

/**
 * Unloads dictionary from memory.  Returns true if successful else false.
 */

bool unload(void)
{
    for(int i = 0; i < 26; i++)
    {
        if(nextNode->children[i] != NULL)
        {
            nextNode = nextNode->children[i];
            unload();
        }
    }

    free(nextNode);
    return true;
}
Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
menan
  • 195
  • 1
  • 1
  • 8
  • 1
    Congratulations, you've pasted an assignment into the internet. What was your question? – Useless Aug 31 '16 at 13:59
  • Sorry, I was having trouble loading my trie, kept getting a segmentation error. – menan Aug 31 '16 at 14:00
  • What is a "trie"? (Ssh, don't help the OP!) – Koshinae Aug 31 '16 at 14:02
  • A [trie](https://en.wikipedia.org/wiki/Trie) is an interesting type of search tree. And, menan - if you have a segmentation error, you should either have a core dump you can load in a debugger, or you can just run the program in your debugger and see it stop at the point when the error occurs. Typically segv means you have a NULL pointer, or an uninitialized pointer, or some other horrible thing. – Useless Aug 31 '16 at 14:05
  • 1
    The semicolon in `#define ASCII_OFFSET 97;` is not wanted. But if the code compiles with it present, are you actually using `ASCII_OFFSET` at all? (Answer: yes, it is used, at least twice, but at the end of an expression so the extra semi-colon is just an empty statement.) Would it be clearer to use `#define ASCII_OFFSET 'a'`? – Jonathan Leffler Aug 31 '16 at 14:21
  • 2
    You don't properly initialize the allocated trie structure element. You need to make sure all those pointers are nulled. – Jonathan Leffler Aug 31 '16 at 14:24
  • How do I NULL these pointers, I have used malloc, and callo? – menan Aug 31 '16 at 14:30
  • @menan Use a `for` loop. – Klas Lindbäck Sep 01 '16 at 13:53

1 Answers1

1
  1. You don't check that a char is in range before using it as index in the array. Any character outside the range a-z will cause buffer overflow.

  2. You compare the character to known character constants after you have subtracted 97 from it.

  3. You need to initialize the memory returned from malloc by assigning NULL to all array elements and set is_word to false.

Just took a short glance at the code, so I may have missed additional bugs.

Klas Lindbäck
  • 33,105
  • 5
  • 57
  • 82
  • You should also use calloc instead of malloc because you check for NULL but malloc doesn't give you a defined memory value. – maxbit89 Sep 01 '16 at 13:14
  • 2
    @maxbit89 You raise a valid point. I don't think `NULL` is required to be `0` by the standard (even though "all" implementations seem to have it as `0`), so I've chosen to recommend explicitly setting the pointers to `NULL` instead. – Klas Lindbäck Sep 01 '16 at 13:53
  • Yes perse it is not defined in Standard but it is well used ^^. See also: http://stackoverflow.com/questions/9894013/is-null-always-zero-in-c – maxbit89 Sep 04 '16 at 14:45