I wrote a binary search tree to store some sorted words. As is often the practice, I do this by allocating new block of memory for the binary tree every time a new word come in. But, strangely, I can only allocating new memory for the binary search tree twice, which means that at the first and second time everything was fine but the program crash at the third memory allocation.
Here is my code:
inputWord.c
/* I pass in the firstNode, and the word I wanna store, and its quantity as argument*/
int inputWord(BSTnode* Node,char* word,int num){
BSTnode* ptr=Node; //ptr was defined to track the location of the node.
while(1){
if(stricmp(word,ptr->data)>0){
/*If the current node already have a rightchild then ptr move to it, and do comparison again*/
if(ptr->rightchild!=NULL){
ptr=ptr->rightchild;
printf("Moving to another (right) node now!!\n");
continue;
}
/*If the current node have no rightchild, then make a new one for it and store the word and its quantity*/
else{
ptr->rightchild=malloc(sizeof(BSTnode));
if(!(ptr->rightchild))
return 1;
ptr=ptr->rightchild;
ptr->rightchild=NULL;
ptr->leftchild=NULL;
strcpy(ptr->data,word);
ptr->num=num;
break;
}
}
else if(stricmp(word,ptr->data)<0){
/*it's all the same as the rightchild part*/
if(ptr->leftchild!=NULL){
ptr=ptr->leftchild;
continue;
}
else{
ptr->leftchild=malloc(sizeof(BSTnode));
if(!(ptr->leftchild))
return 1;
ptr=ptr->leftchild;
ptr->leftchild=NULL;
ptr->rightchild=NULL;
strcpy(ptr->data,word);
ptr->num=num;
break;
}
}
/*If the word have already been stored in the tree, print out this message*/
else{
fprintf(stdout,"It is exactly the same word!!\n");
return 0;
}
}
return 0;
}
I have make some necessary comments above to help you understand my intention.Hopefully that would help.
As you can see, that function was pretty straight and simple. And it did work for the first two invokation.But it crash when invoked the third time!!(always the third time).
So I made some test. And now I am pretty sure that it crash at the line
ptr->leftchild=malloc(sizeof(BSTnode));
(make it clear that the data offirstNode
is initialize with ""
for comparison. And I pass in the word "The
" first and "Project
" second and "Gutenberg
" third. And the structure of BSTnode
is
typedef struct BSTnode{
char data[20];
struct BSTnode* leftchild;
struct BSTnode* rightchild;
int num;
}BSTnode;
)
How I make that test is listed as below. (It is the same code, only with some extra print
statement for test)
int inputWord(BSTnode* Node,char* word,int num){
printf("Enter inputWord() successfully!!\n");
BSTnode* ptr=Node;
while(1){
if(stricmp(word,ptr->data)>0){
if(ptr->rightchild!=NULL){
ptr=ptr->rightchild;
printf("Moving to another (right) node now!!\n");
continue;
}
else{
printf("I need a new rightchild!!\n");
ptr->rightchild=malloc(sizeof(BSTnode));
printf("New rightchild created successfully!!\n");
if(!(ptr->rightchild))
return 1;
ptr=ptr->rightchild;
ptr->rightchild=NULL;
ptr->leftchild=NULL;
printf("......In line 27 now!!\n");
strcpy(ptr->data,word);
printf("Copied successfully!!!..In line 29 now!!\n");
ptr->num=num;
fprintf(stdout,"New data '%s' successfully inserted into a new (right) node at %p (value of pointer)\n",word,ptr);
break;
}
}
else if(stricmp(word,ptr->data)<0){
if(ptr->leftchild!=NULL){
ptr=ptr->leftchild;
printf("Moving to another (left) node now!!\n");
continue;
}
else{
printf("I need a new left child!!!\n");
ptr->leftchild=malloc(sizeof(BSTnode));
printf("New leftchild created successfully!!\n");
if(!(ptr->leftchild))
return 1;
ptr=ptr->leftchild;
ptr->leftchild=NULL;
ptr->rightchild=NULL;
printf("......In line 47 now!!\n");
strcpy(ptr->data,word);
printf("Copied successfully!!!..In line 51 now!!\n");
ptr->num=num;
fprintf(stdout,"New data '%s' successfully inserted into a new (left) node at %p (value of pointer)\n",word,ptr);
break;
}
}
else{
fprintf(stdout,"Nothing else to insert!!\n");
return 0;
}
}
return 0;
}
As you can see, with some print
statements telling me where have I been, I can be sure where the program crash.
Any idea why it always crash at the third time?
#######################################################################3main.c
#include<stdlib.h>
#include<stdio.h>
#include<string.h>
#include<stdbool.h>
#include "wordCount.h"
void prompt(BSTnode*,FILE*);
char arr[20]={0};
int main()
{
BSTnode* firstNode=malloc(sizeof(BSTnode));
firstNode->leftchild=NULL;
firstNode->rightchild=NULL;
strcpy(firstNode->data,"");
firstNode->num=0;
FILE* fs=fopen("testfile.txt","r");
if(!fs){
printf("Failed to open fiel!!\n");
return 2;
}
while(1){
if(ferror(fs))
perror("there is a error in fs in the beginning of while loop!\n");
prompt(firstNode,fs);
}
return 0;
}
void prompt(BSTnode* Node,FILE* fs){
int i=0;
printf("Please select\n1.find and input a word into the binary tree\n2.print only one data\n3.Exit\n");
if(scanf("%d",&i)!=1){
printf("scanf failed!!\nplease input a valid number!!\n");
//fflush(stdin);
return;
}
getchar();
switch(i){
case 1:
{
memset(arr,'\0',20); //since the "arr" is used to hold the newWord founded and returned, it should be clear first every time
char* newWord=findWord(fs);
int totalNumberOfTheWord=wordCount(fs,newWord);
inputWord(Node,newWord,totalNumberOfTheWord);
break;
}
case 2:
printOneNode(Node);
break;
case 3:
exit(0);
default:
printf("Please input a valid number!(1-3)");
}
}
Also, the wordCount.h:
#ifndef WORDCOUNT_H
#define WORDCOUNT_H
#include<stdlib.h>
#include<stdio.h>
typedef struct BSTnode{
char data[20];
struct BSTnode* leftchild; //if less than, put it on the left
struct BSTnode* rightchild; //if greater than, on the right
int num;
}BSTnode;
int inputWord(BSTnode*,char*,int);
char* findWord(FILE*);
int wordCount(FILE*,char*);
int printOneNode(BSTnode*);
#endif
The function prompt()
is used to prompt the user to decide whether to continue word-searching.
full source code:
wordCount.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include "wordCount.h"
int wordCount(FILE* fs,char* word)
{
int num=0;
rewind(fs);
size_t n1=sizeof(word);
size_t n2=strlen(word);
char* buff=malloc(n1) ;
if(buff==NULL)
return 1;
memset(buff,'\0',n1);
/* I count the word by moving byte by byte and do comparison*/
if (fs != NULL) {
if (n2 == fread(buff, 1,n2, fs)) {
do {
if (strnicmp(buff,word,n2) == 0)
num++;
memmove(buff, buff+1,n2-1);
} while (1 == fread(buff+n2-1, 1, 1, fs));
// I think I might optimize
// this using KMP algorithm
}
}
free(buff);
return num;
}
findWord.c
#include<string.h>
#include<stdio.h>
#include<stdbool.h>
#include<stdlib.h>
#include "wordCount.h"
extern char arr[20];
char* findWord(FILE* fs)
{
static long pos=0;
fseek(fs,pos,SEEK_SET);
if(ferror(fs)){
perror("fseek() failed!!!\n");
fprintf(stderr,"fseek() failed in file %s\n",__FILE__);
exit(EXIT_FAILURE);
}
char chr[1]={0};
bool flag1=false;
bool flag2=false;
while((1==fread(chr,1,1,fs))&&(!(flag1==false&&flag2==true))){
// This would make the findword() function
// find only a single word once
if(chr[0]!=32){
strncat(arr,chr,1);
flag2=true;
flag1=true;
}
else
flag1=false;
}
/*the key method that I use to find a new word is that I use two 'bool' flags: flag1 and flag2.
*Only when the "arr" is filled only with character, not a single space, will the flag1 be false and flag2 be true, thus breaking the while loop*/
pos=ftell(fs)-1;
//maybe everytime you use "fseek()", "ftell()", the
//file-position will move one byte ahead.
return arr;
}
printOneNode.c
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include"wordCount.h"
int printOneNode(BSTnode* Node){
BSTnode* ptr=Node;
while(1){
printf("Select which side of node do you want to print now(l/r)?(q for quit) ");
char a;
getchar(); //this is used to consume the newline character left
//fflush(stdin);
if(scanf("%c",&a)!=1){
printf("scanf failed!!");
return 1;
}
switch(a){
case 'l':
{
if(ptr->leftchild!=NULL){
ptr=ptr->leftchild;
printf("\t%s\n",ptr->data);
}
else
printf("There is no more leftchild\n");
break;
}
case 'r':
{
if(ptr->rightchild!=NULL){
ptr=ptr->rightchild;
printf("\t%s\n",ptr->data);
}
else
printf("There is no more rightchild!\n");
break;
}
case 'q':
return 0;
default:
return 0;
}
}
}
The function findWord()
is used to find a new word for insertion. For example, if there is string This is a lovely place...
in the textfile.txt
, then the findWord()
would first find out a word This
and then is
secondly and then a
thirdly, etc. (This is the reason why I define the pos
as a static variable to keep track of the location.)
The function wordCount()
is used to count out how many time those the word returned by findWord()
appear in the testfile.txt
.
The function printOneNode()
is used to print out the data of one single node according to the user's willingness. I designed this function but haven't use it yet, which mean that in the prompt()
function I always choose to "find and input a new word into the binary search tree"). So this may not the reason that cause my program to crash "occasionally".
As summary, my routine is:
- prompt the user asking whether to find and insert a new word( always yes)
- find a new word in the
testfile.txt
usingfindWord()
- count the number using
wordCount()
- insert it into the binary search tree using
inputWord()
Repeat that.
I cannot make this program smaller any more to make it more understandable, because it have to find a word and count it insert it. But you can ignore that printOneNode()
function, to some extent.
As for the testfile.txt
, I have posted the link below at the comment area. Thanks