0

im trying to find out the number of different words of a text in a file, using dynamic memory allocation. however, i dont get the right results. the text can contain punctuation. the program is below:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int different_words(FILE *fp);

int main(int argc, char *argv[]) {

    FILE *fp;

    different_words(fp);
    return 0;
}

int different_words(FILE *fp) {

    int i,j,ic=0,sum=0,sum2=0;
    char d[100];
    char **A;

    fp=fopen("file.txt","rt");
    if ((fp = fopen("file.txt", "rt"))== NULL) { //opening the file
        printf("cannot read file\n");
        exit(1);
    }
    while (fscanf(fp,"%s",&d)!=EOF)
        sum++;
    A=(char**)malloc(sum*sizeof(char*)); //allocate memory for all the words 
    if (A==NULL) {
        fclose(fp);
        return 0;
    }
    rewind(fp);
    while(fscanf(fp,"%s",&d)!=EOF){
        if (strchr("!?.,:",d[strlen(d)-1])==0) //edit
            A[ic]=(char*)malloc(strlen(d)+1); 
        else 
            A[ic]=(char*)malloc(strlen(d));
        if (A[ic]==NULL) {
            fclose(fp);
            return 0;
        }
        if (strchr("!?.,:",d[strlen(d)-1])!=0)
            for (j=0;j<=strlen(d)-2;j++)
                A[ic][j]=d[j];
        else
            strcpy(A[ic],d);
        if (++ic==sum)
            break;
    }
    for (i=0;i<sum;i++){
        for (j=0;j<i;j++){
                if (strcmp(A[i],A[j])==0) 
                        break;
        }
        if (i==j) {
                sum2++; //finding the number of different words in the text
        }
    }
    printf ("Number of different words in the text: %d\n",sum2); 
    return sum2;
}


----------
  • why are you `fopen`ing twice? Also, what is the meaning of the `t` parameter you're passing to `fopen`? I'm not seeing a `t` option on the man page. – yano May 03 '16 at 16:29
  • thanks but now i dont get the right result. check the code above – Stamatis Papadopoulos May 03 '16 at 17:25
  • What's your input? What result do you get? – yano May 03 '16 at 17:59
  • @yano i create a file: My name is Stamatis Papadopoulos im nineteen years old My name is, Stamatis Papadopoulos! im nineteen years old. And i get result 10 instead of 9. generally when i put punctuation i dont get right result – Stamatis Papadopoulos May 03 '16 at 18:03

1 Answers1

0

The problem is here:

if (A=NULL) {
    fclose(fp);
    return 0;
}

You're assigning A to NULL instead of checking it for NULL. Change it to

if (A==NULL) {
    fclose(fp);
    return 0;
}

Also, the consensus is you shouldn't cast the return value of malloc and you should also take a look at this regarding reading data from files.

Community
  • 1
  • 1
yano
  • 4,827
  • 2
  • 23
  • 35
  • thanks but now i dont get the right result. check the code above – Stamatis Papadopoulos May 03 '16 at 17:26
  • @StamatisPapadopoulos When there is punctuation in a word, you are not null terminating the string when you copy it to your `A` word list. This will cause the `strcmp` to not perform as you expect. Inside the `if` statement, after the `for` loop, add `A[ic][j] = '\0';`. Make sure you add brackets to your `if` statement. – yano May 03 '16 at 18:27
  • thanks mate :D ! but when i dont use dynamic memory allocation and i have a static array why i dont have to do this? – Stamatis Papadopoulos May 03 '16 at 18:43
  • @StamatisPapadopoulos You do. All strings in `C` must be null-terminated with `'\0'`. If you're not doing this with your static array and things still work, then you're just getting lucky. Maybe your compiler is zeroing the static memory, but I don't think that is a mandate of the `C` standard, so do NOT count on that behavior. When dealing with strings in `C`, make sure you have enough bytes for the characters in the string plus 1 for the `'\0'`, and make sure all strings are terminated with `'\0'`. Anything else is undefined behavior. – yano May 03 '16 at 18:48
  • but why i dont do this inside the else too? – Stamatis Papadopoulos May 03 '16 at 18:58
  • @StamatisPapadopoulos Actually I was wrong. Looks like static memory _is_ initialized to 0, per the `c` standard: http://stackoverflow.com/questions/1414215/initial-value-of-int-array-in-c and http://stackoverflow.com/questions/3373108/why-are-static-variables-auto-initialized-to-zero – yano May 03 '16 at 19:01
  • @StamatisPapadopoulos Because in the `else` you're calling the `strcpy` function. This copies the text of the string plus the null byte as stated in its man page: http://linux.die.net/man/3/strcpy – yano May 03 '16 at 19:03
  • @StamatisPapadopoulos heh,, thanks,, but trust me, there is an upper echelon of people on this site that I am nowhere near. I come here for learning, it's a great site! – yano May 03 '16 at 19:08
  • that's true! i'm 1st age undergraduate in electrical & computer engineering in university of patras in greece and we have one task in programming principles in c :) – Stamatis Papadopoulos May 03 '16 at 19:13
  • @StamatisPapadopoulos Cool! Computer engineering is a frustrating and difficult, yet worthwhile degree. I wish you well on your quest. – yano May 03 '16 at 19:27