String with count in a file getting the top 10 counted words

Question

I'm trying to Write a C program to read the file out3.dat.txt and print the ten most highly ranked keywords and their counts.

For example:

Sample out3.dat.txt file

word, 2
wordtwo, 1
wordthree, 2
wordfour, 3

Expected Output

wordfour, 3
word, 2
wordthree, 2
wordtwo, 1

My approach is to sort the counts for the keywords in decreasing order and then locate the respective keywords corresponding to the highest counts. I just want to assume that the ranking is only by the count occurrence (i.e. keywords with the same counts have the same rank regardless of their alphabetical ordering).

This is what I have attempted and struggled with:

#include <stdio.h>
#include <string.h>

int main(void) {
    FILE *inp;
    FILE *outp;
    int n, i, j;
    char item, item2, keywords[42][20], word[20];

    inp = fopen("out3.dat.txt", "r");
    outp = fopen("out4.dat.txt", "w");

    while (!feof(inp)) {
        for (i = 0; i < 42; ++i) {
            fgets(keywords[i], 20, inp);
            int l = strlen(keywords[i]) - 1;
            if (keywords[i][l] == '\n')
                keywords[i][l] = '\0';
        }
    }
    //V--------below is the issue but I cant seem to figure it out-----V
    
    for (i = 0; i < 42; ++i) {
        strcpy(word, keywords[i]);

        for (j = 1; j < 20; ++j) {
            if (word[j] == ' ') {
                ++j;
                //printf("%c\n", words[i][j]);
                item = word[j];
                //printf("%c\n", item);
                --j;
            }

            ++i;

            if (keywords[i][j] == ' ') {
                ++j;
                item2 = keywords[i][j];
                --j;
            }

            --i;

            if (item >= item2 && n < 10) {
                printf("%s\n", word);
                n++;
            }
            //printf("%d\n", r);
        }
    }

    fclose(inp);
    fclose(outp);

    return (0);
}

[`while (!feof(file)) is always wrong`](https://stackoverflow.com/questions/5431941/why-is-while-feof-file-always-wrong) — pmg, Jun 05 '21 at 09:58

score 0 · Answer 1 · answered Jun 06 '21 at 08:46

Sample code :

 #include <stdio.h>
#include <stdlib.h>
#include <string.h>

//Limitation :
//      Character in single line is maximum 100
//      Assumes input file always has '<word> <rank>'
//      Maximum lines in the input documents limited to 50 -
//      Output is on console

#define WORD_LEN    100
#define NUM_KEYWORD 50

typedef struct WordRank_s
{
    int Rank;
    char Word[WORD_LEN];
}WordRank_t;


int main(void)
{
    FILE *inp;
    WordRank_t  WordRank[NUM_KEYWORD], Temp;
    char word[WORD_LEN] = {0};

    int i, LineCount = 0,step,j;

    inp = fopen("D:\Input.txt", "r");           //Provide the path of your input file

    if(NULL == inp)
        return 1;                               //failure to open input file

    for(i = 0; i < NUM_KEYWORD; ++i)
    {
        if(fgets(word, WORD_LEN, inp) != NULL)
        {
            char *p =  strchr(word, ' ');
            if(NULL != p)
            {
                *p = '\0';
                strcpy(WordRank[i].Word, word);
                WordRank[i].Rank = atoi((p+1));
            }       
            else
                break;
            LineCount++;
        }
        else
            break;
    }
    fclose(inp);

    printf("\n\n -------- Printing input ------- \n");
    for(i = 0; i < LineCount; ++i)
        printf("%s %d\n", WordRank[i].Word,WordRank[i].Rank);

    //Sort as per Rank
    for(step = 0; (step < LineCount-1) ; ++step)
    {
        for(j=0; j < LineCount-step-1; ++j)
        {
            if(WordRank[j].Rank > WordRank[j+1].Rank)
            {
                memcpy(&Temp, &WordRank[j], sizeof(WordRank_t));
                memcpy(&WordRank[j], &WordRank[j+1], sizeof(WordRank_t));
                memcpy(&WordRank[j+1],&Temp, sizeof(WordRank_t));
            }
        }       
    }

    printf("\n\n -------- Printing in order------- \n");
    for(i = LineCount -1; i >= 0; --i)
        printf("%s %d\n", WordRank[i].Word,WordRank[i].Rank);

    return(0);
}

Sample Output :

-------- Printing input -------
word, 2
wordtwo, 1
wordthree, 2
wordfour, 3
wordfive, 4
words, 8
wordn, 10
asfdf, 5
dfasdf 7
sadfasdf 9
sfdasdf3 2
asdfsa 23
sdfasdf 20


 -------- Printing in order-------
asdfsa 23
sdfasdf 20
wordn, 10
sadfasdf 9
words, 8
dfasdf 7
asfdf, 5
wordfive, 4
wordfour, 3
sfdasdf3 2
wordthree, 2
word, 2
wordtwo, 1

That is a bit complex to just achieve that, isn't their a more refined or discrete method to approach this. Also I noticed that word3 is before word ???? that's not really ideal would rather the order original if its equal values. Thank you though it does the job just not what was envisioned. — Tom Snow, Jun 06 '21 at 12:21
@TomSnow - you mentioned "keywords with the same counts have the same rank regardless of their alphabetical ordering". Hence i considered only rank comparison - not the 'Keywords'. i hope you notice that in code. If you want compare 'Keywords' it will get further complex. — Sudhee, Jun 06 '21 at 13:52

String with count in a file getting the top 10 counted words

Sample out3.dat.txt file

Expected Output

1 Answers1