0

Welcome everybody. I am new to Stackoverflow, I code in C for some time. I have run to a problem writing a program counting word occurrences in a text file. I need to have an output telling what word occurred how many times. Here is the source code:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int new_words=0;
int nwords=0;

typedef struct element{
    char word[30];
    int how_many;
} element;

int is_word_new(element ** dictionary, char * string)
{
    for (int i =0; i<new_words; i++)
    {
        if (strcmp(string, dictionary[i]->word)==0)
            return 0;
    }
    return 1;
}
int which_word(element ** dictionary, char * string)
{
    for (int i =0; i<new_words; i++)
    {
        if (strcmp(string, dictionary[i]->word)==0)
            return i;
    }
    return 0;
}

int main()
{
    FILE * fp;
    char word[30];


    fp=fopen("input.txt", "r");
    if (fp==NULL)
    {
        printf("FILE ERROR");
        return 0;
    }


    while(!feof(fp))
    {
        fscanf(fp, "%s",word);
        nwords++;
    }
    nwords--;
    rewind(fp);

    struct element * dictionary = (element*)malloc(sizeof(element)*nwords);

    for (int i =0; i<nwords; i ++)
    {
        fscanf(fp, "%s", word);

        if( is_word_new(&dictionary, word) )
        {
            strcpy(dictionary[new_words].word, word);
            //dictionary[new_words].word= word;
            dictionary[new_words].how_many=1;
            new_words++;
        }
        else
            dictionary[which_word(&dictionary, word)].how_many++;
        word[0]='\0';
    }

    printf("\n\nFinal dictionary\n with %d words", new_words);
    for (int i =0; i<new_words; i++)
    {
        printf("%s %d \n", dictionary[i].word, dictionary[i].how_many);     
    }

    free(dictionary);
    fclose(fp);
    return 0;
}

the idea is that i first count how many words are in the text (which somehow is always greater by one than in fact). The function is_word_new checks if a newly read word is already in the dictionary. which_word() tells which word was found

However I get a segmentation fault running this program. When I used the line which is commented // dictionary[i].word=word the program behaved as if there was only "word" in the dictionary.

Please give me hints where am I doing this stuff wrong

WhozCraig
  • 65,258
  • 11
  • 75
  • 141
Grzegorz
  • 17
  • 1
  • 3
  • 8
    I wish there were a program to count occurrences of this question on SO... – Eugene Sh. Jun 12 '15 at 17:04
  • `is_word_new(element ** dictionary...` : `element ** dictionary` meant {element *,element *,...}, `dictionary` is pointer to top. but actualy pointer to pointer to {element ,element ,...} – BLUEPIXY Jun 12 '15 at 17:23
  • @BLUEPIXY, I am not sure what you mean by the comment, but it's similar to what I have answered below, right? Sorry for asking, but I really want to take the refiner badge,so my answer has to be good to take at least one upvote. – gsamaras Jun 12 '15 at 17:40
  • @gsamaras sorry my bad english. My indications is that the actual as that is assumed is different. i agree that "no need to pass double pointer" – BLUEPIXY Jun 12 '15 at 17:59
  • Oh I see @BLUEPIXY, at least that comment might give me a chance for that badge (by making the question active). :P – gsamaras Jun 12 '15 at 18:26
  • See [`while (!feof(fp))` is always wrong](http://stackoverflow.com/questions/5431941/while-feof-file-is-always-wrong) explains why you get an off-by-one error from a loop such as yours. – Jonathan Leffler Jun 13 '15 at 02:47
  • possible duplicate of [Counting words in a file in C](http://stackoverflow.com/questions/29772650/counting-words-in-a-file-in-c) – David Hoelzer Jun 13 '15 at 15:45

1 Answers1

0

Must read question: Why is “while ( !feof (file) )” always wrong? Thanks to Jonathan Leffler's comment.


Please check my comments in the code below. I got you a start up for when the words are appearing once. I am letting the rest of the job for you, so that we can share the fun, but you can of course ask.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int new_words = 0;
int nwords = 0;

typedef struct element {
    char word[30];
    int how_many;
} element;

// no need to pass double pointer
int is_word_new(element* dictionary, char * string) {
    int i;
    for (i = 0; i < new_words; i++) {
        printf("|%s|, |%s|\n", string, dictionary[i].word);
        if (strcmp(string, dictionary[i].word) == 0)
            return 0;
        printf("i=%d\n",i);
    }
    return 1;
}

int which_word(element ** dictionary, char * string) {
    int i;
    for (i = 0; i < new_words; i++) {
        if (strcmp(string, dictionary[i]->word) == 0)
            return i;
    }
    return 0;
}

int main() {
    FILE * fp;
    char word[30];


    fp = fopen("test.txt", "r");
    if (fp == NULL) {
        printf("FILE ERROR");
        return 0;
    }

    printf("file read\n");

    int read_counter;
    while (!feof(fp)) {
        read_counter = fscanf(fp, "%s", word);
        // increment only if we really read something
        if(read_counter >= 0)
                nwords++;
    }
    // this is wrong, remove it
    //nwords--;
    rewind(fp);

    printf("nwords = %d\n", nwords);
    // do not cast what malloc returns. Also struct is not needed.
    element * dictionary = malloc(sizeof (element) * nwords);

    int i;
    for (i = 0; i < nwords; i++) {
        fscanf(fp, "%s", word);
        printf("read |%s|\n", word);
        if (is_word_new(dictionary, word)) {
            strcpy(dictionary[new_words].word, word);
            //dictionary[new_words].word= word;                     
            dictionary[new_words].how_many = 1;
            new_words++;
        } else {
            printf("bhka\n");
            dictionary[which_word(&dictionary, word)].how_many++;
        }
        //word[0] = '\0';
    }

    printf("\n\nFinal dictionary\n with %d words", new_words);
    for (i = 0; i < new_words; i++) {
        printf("%s %d \n", dictionary[i].word, dictionary[i].how_many);
    }

    free(dictionary);
    fclose(fp);
    return 0;
}

Here is the test.txt I used:

sam klouvi george dit epfl
ok
end
Community
  • 1
  • 1
gsamaras
  • 71,951
  • 46
  • 188
  • 305
  • Thank you for your help, it really worked. I don't know really why, but what helped was changing the double pointer to single pointer from 'dictionary' in what_word(). Now it works just as it was supposed to be. – Grzegorz Jun 14 '15 at 21:42