Adapting the code in the question (as of 2014-02-25 20:55 -08:00) into a program (add headers <assert.h>
, <ctype.h>
, <stdbool.h>
, <stdio.h>
, <stdlib.h>
, <string.h>
; add an assertion after the memory allocation, fix the use of fgets()
), and remove newlines from the data, and print the lines (words) as they're read, and valgrind
gives it a crash-free (but very leaky) run.
Code:
#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static int wordCount = 0;
typedef struct node
{
bool isWord;
struct node *children[26];
} node;
static struct node *newNode(struct node *n)
{
n = (struct node *)malloc(sizeof(struct node));
assert(n != 0);
n->isWord = false;
for (int i = 0; i < 26; i++)
n->children[i] = 0;
return n;
}
static struct node *n = NULL;
static
void append(char *s)
{
struct node *m = NULL;
m = n;
for (int i = 0, p = strlen(s); i < p; i++)
{
if (m->children[s[i] - 'a'] == NULL)
m->children[s[i] - 'a'] = newNode(m->children[s[i] - 'a']);
m = m->children[s[i] - 'a'];
if (i == p - 1)
{
m->isWord = true;
wordCount++;
}
}
}
static
bool load(const char *dictionary)
{
FILE *f = fopen(dictionary, "r");
n = newNode(n);
if (f == NULL)
return false;
char s[100];
while (fgets(s, sizeof(s), f) != 0)
{
for (int i = 0, j = strlen(s); i < j; i++)
s[i] = tolower(s[i]);
s[strlen(s)-1] = '\0';
printf("[%s]\n", s);
append(s);
}
return true;
}
int main(void)
{
if (load("file"))
{
printf("File loaded OK\n");
return 0;
}
else
{
printf("Failed to load file\n");
return 1;
}
}
Data:
an
anne
apple
aardvark
appalachian
antelope
antediluvian
alabama
antidisestablishmentarianism
Output from valgrind
:
$ valgrind ./prefix
==29970== Memcheck, a memory error detector
==29970== Copyright (C) 2002-2012, and GNU GPL'd, by Julian Seward et al.
==29970== Using Valgrind-3.8.1 and LibVEX; rerun with -h for copyright info
==29970== Command: ./prefix
==29970==
[an]
[anne]
[apple]
[aardvark]
[appalachian]
[antelope]
[antediluvian]
[alabama]
[antidisestablishmentarianism]
File loaded OK
==29970==
==29970== HEAP SUMMARY:
==29970== in use at exit: 15,472 bytes in 70 blocks
==29970== total heap usage: 70 allocs, 0 frees, 15,472 bytes allocated
==29970==
==29970== LEAK SUMMARY:
==29970== definitely lost: 0 bytes in 0 blocks
==29970== indirectly lost: 0 bytes in 0 blocks
==29970== possibly lost: 0 bytes in 0 blocks
==29970== still reachable: 15,472 bytes in 70 blocks
==29970== suppressed: 0 bytes in 0 blocks
==29970== Rerun with --leak-check=full to see details of leaked memory
==29970==
==29970== For counts of detected and suppressed errors, rerun with: -v
==29970== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 2 from 2)
$
If you don't trim the newlines, you are going to be accessing memory out of bounds; that would be my best guess as to what goes wrong. It isn't clear-cut that's the problem. When I commented out the line that zaps the newline, I got a complaint about using uninitialized data in the line:
if (m->children[s[i] - 'a'] == NULL)
(Note: the assert
is a very crude way of detecting memory allocation problems; effective but very crude.)