3

I'm trying to write a function to input words from a text file into a prefix tree, but it keeps giving me segmentation fault

int wordCount = 0;
typedef struct node
{
    bool isWord;
    struct node* children[26];
}node;
struct node* newNode(struct node* n)
{
    n = (struct node*)malloc(sizeof(struct node));
    n->isWord = false;
    for (int i = 0; i < 26; i++)
        n->children[i] = 0;    
    return n;
}
struct node* n = NULL;

void append(char* s)
{
    struct node* m = NULL;
    m = n;
    for (int i = 0, p = strlen(s); i < p; i++)
    {
        if (m->children[s[i] - 'a'] == NULL)
            m->children[s[i] - 'a'] = newNode(m->children[s[i] - 'a']);
        m = m->children[s[i] - 'a'];
        if (i == p - 1)
        {
            m->isWord = true;
            wordCount++;
        }            
    }              
}
bool load(const char* dictionary)
{
    FILE* f = fopen(dictionary, "r");
    n = newNode(n);
    if (f == NULL)
        return false;
    char s[100];
    while (f != NULL && !feof(f))
    {
        fgets(s, sizeof(s), f);       
        for (int i = 0, j = strlen(s); i < j; i++)
            s[i] = tolower(s[i]);  
        append(s);
    }
    return true;
}

After some testing, I'm pretty sure the problem is the append function.

Ryan Aleksander
  • 423
  • 5
  • 18
  • Could you expound on the why your testing makes you suspect the `append()` function? Don't make us guess. :) – Travis Griggs Feb 26 '14 at 04:45
  • 1
    Read [`while (!feof(file))` is always wrong](http://stackoverflow.com/questions/5431941/while-feof-file-is-always-wrong). Have you printed the data as you read it (what does `fgets()` read each time)? Have you checked for hyphens, spaces, apostrophe's in the dictionary? Did you remember to remove the newline from the end of each line? – Jonathan Leffler Feb 26 '14 at 04:52
  • Whenever I comment out the append(s) line, and the segmentation fault is gone. I haven't checked whether the function works properly or not yet, since I have yet to make it work. – Ryan Aleksander Feb 26 '14 at 23:27
  • Thanks Jonathan, I found the problem! There are words with apostrophe in the dictionary file, and I forgot to make room for them... – Ryan Aleksander Feb 27 '14 at 00:57

1 Answers1

1

Adapting the code in the question (as of 2014-02-25 20:55 -08:00) into a program (add headers <assert.h>, <ctype.h>, <stdbool.h>, <stdio.h>, <stdlib.h>, <string.h>; add an assertion after the memory allocation, fix the use of fgets()), and remove newlines from the data, and print the lines (words) as they're read, and valgrind gives it a crash-free (but very leaky) run.

Code:

#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static int wordCount = 0;

typedef struct node
{
    bool isWord;
    struct node *children[26];
} node;

static struct node *newNode(struct node *n)
{
    n = (struct node *)malloc(sizeof(struct node));
    assert(n != 0);
    n->isWord = false;
    for (int i = 0; i < 26; i++)
        n->children[i] = 0;
    return n;
}

static struct node *n = NULL;

static
void append(char *s)
{
    struct node *m = NULL;
    m = n;
    for (int i = 0, p = strlen(s); i < p; i++)
    {
        if (m->children[s[i] - 'a'] == NULL)
            m->children[s[i] - 'a'] = newNode(m->children[s[i] - 'a']);
        m = m->children[s[i] - 'a'];
        if (i == p - 1)
        {
            m->isWord = true;
            wordCount++;
        }
    }
}

static
bool load(const char *dictionary)
{
    FILE *f = fopen(dictionary, "r");
    n = newNode(n);
    if (f == NULL)
        return false;
    char s[100];
    while (fgets(s, sizeof(s), f) != 0)
    {
        for (int i = 0, j = strlen(s); i < j; i++)
            s[i] = tolower(s[i]);
    s[strlen(s)-1] = '\0';
    printf("[%s]\n", s);
        append(s);
    }
    return true;
}

int main(void)
{
    if (load("file"))
    {
        printf("File loaded OK\n");
        return 0;
    }
    else
    {
        printf("Failed to load file\n");
        return 1;
    }
}

Data:

an
anne
apple
aardvark
appalachian
antelope
antediluvian
alabama
antidisestablishmentarianism

Output from valgrind:

$ valgrind ./prefix
==29970== Memcheck, a memory error detector
==29970== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==29970== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==29970== Command: ./prefix
==29970== 
[an]
[anne]
[apple]
[aardvark]
[appalachian]
[antelope]
[antediluvian]
[alabama]
[antidisestablishmentarianism]
File loaded OK
==29970== 
==29970== HEAP SUMMARY:
==29970==     in use at exit: 15,472 bytes in 70 blocks
==29970==   total heap usage: 70 allocs, 0 frees, 15,472 bytes allocated
==29970== 
==29970== LEAK SUMMARY:
==29970==    definitely lost: 0 bytes in 0 blocks
==29970==    indirectly lost: 0 bytes in 0 blocks
==29970==      possibly lost: 0 bytes in 0 blocks
==29970==    still reachable: 15,472 bytes in 70 blocks
==29970==         suppressed: 0 bytes in 0 blocks
==29970== Rerun with --leak-check=full to see details of leaked memory
==29970== 
==29970== For counts of detected and suppressed errors, rerun with: -v
==29970== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
$

If you don't trim the newlines, you are going to be accessing memory out of bounds; that would be my best guess as to what goes wrong. It isn't clear-cut that's the problem. When I commented out the line that zaps the newline, I got a complaint about using uninitialized data in the line:

        if (m->children[s[i] - 'a'] == NULL)

(Note: the assert is a very crude way of detecting memory allocation problems; effective but very crude.)

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278