0

I am writing a program in c that searches through source code files and counts the number of ‘C’ reserved words encountered. But the reserved word is printed only if the reserved word entered is first word. And it counts total number of strings not total no of reserved word used. Can someone help me on this. My code is so messy please don't mind that.

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#define KEYMAX 32


FILE *fp ;
char data[1024];
struct keyword
{
    char word[10];
    int occur;
};
int i = 0, j = 0, pos;
char str[100], unit[20], ch;
int stored[1024];
char delimiters[] = " \t\n\v\f\r";  /* possible space delimiters */
char *token;
struct keyword key[32] = {"auto", 0, "break", 0, "case", 0,
                          "char", 0, "const", 0, "continue", 0,
                          "default", 0, "do", 0, "double", 0,
                          "else", 0, "enum", 0, "extern", 0,
                          "float", 0, "for", 0, "goto", 0,
                          "if", 0, "int", 0, "long", 0,
                          "register", 0, "return", 0, "short", 0,
                          "signed", 0, "sizeof", 0, "static", 0,
                          "struct", 0, "switch", 0, "typedef", 0,
                          "union", 0, "unsigned", 0, "void", 0,
                          "volatile", 0, "while", 0,};

int main()
{
    takeinput();
    system("CLS");
    theresult();
    // processresult();
    // ctoken();
return (0);

}

int takeinput()    // function to write in the file
{

  printf( "**********Welcome*************" ) ;
    fp = fopen("test.c", "w") ;   // Open file in write mode.
    if ( fp == NULL )
    {
        printf( "Could not open file test.c" ) ;  // Prints the statement if the file is not able to open.
        return 1;
    }
    printf( "\nPlease enter some text from keyboard to write in the file test.c \n\t" ) ;
    // getting input from user
    while ( strlen ( gets( data ) ) > 0 )
    {
        // writing in the file
        fputs(data, fp) ;           // Writes to file
        fputs("\n", fp) ;
    }

    // closing the file
    fclose(fp) ;
    return 0;
}

int theresult()
{

   fp = fopen("test.c", "r"); // read mode

   if (fp == NULL)
   {
      perror("Error while opening the file.\n");   // Prints the statement if the file is not able to open.
      return 1;
   }
   printf("The contents of test.c file are:\n");

 // To covert the ch into str
   int i= 0;
  //  printf("-----this is from ch----\n"); (Just for reference)
   while((ch = fgetc(fp)) != EOF)
   {
    str[i]=ch;
    i++;

   //  printf("%c",ch);   prints character

   }

   printf("%s",str);

   // printf("\n----This is from token-----\n");   (just for reference)
   for (token = strtok(str, delimiters); token != NULL;
         token = strtok(NULL, delimiters)) /* 'for loop' conditional part */
        /* prints token one per line */
       // puts(token);      // prints token

    for (i = 0; i < strlen(str); i++)
    {
        while (i < strlen(str) && str[i] != ' ' && isalpha(str[i]))
        {
            unit[j++] = tolower(str[i++]);
        }
        if (j != 0)
        {
            unit[j] = '\0';
            pos = binarysearch(unit, key);
            j = 0;
            if (pos != -1)
            {
               key[pos].occur++;
            }
        }
    }
    printf("***********************\n   Keyword\tCount\n***********************\n");
    for (i = 0; i < KEYMAX; i++)
    {
        if (key[i].occur)
        {
            printf("  %s\t  %d\n", key[i].word, key[i].occur);       // Prints the reserved keyword and its occurance
        }
    }

    fclose(fp);
   return (0);
}



int binarysearch(char *word, struct keyword key[])
{
    int low, high, mid;

    low = 0;
    high = KEYMAX - 1;
    while (low <= high)
    {
        mid = (low + high) / 2;
        if (strcmp(word, key[mid].word) < 0)
        {
            high = mid - 1;
        }
        else if (strcmp(word, key[mid].word) > 0)
        {
            low = mid + 1;
        }
        else
        {
            return mid;
        }
    }
    return -1;
}

The string entered is: if i break please re-join it. float float


Keyword Count


if            1 
break     1
Float     2
  • 1
    The provided example looks fine to me. What am I missing? – Support Ukraine Apr 29 '19 at 10:53
  • 1
    OT: [don't use gets](https://stackoverflow.com/questions/1694036/why-is-the-gets-function-so-dangerous-that-it-should-not-be-used) – Jabberwocky Apr 29 '19 at 11:23
  • The posted code does not compile!. Amongst other things, it is missing the needed prototypes for the sub functions. Suggest enabling the warnings on your compiler, then fixing those warnings. ( for `gcc`, at a minimum use: `-Wall -Wextra -Wconversion -pedantic -std=gnu11` ) Note: other compilers use different options to produce the same results. Using the listed options causes the compiler to output 15 warnings, many of which are quite serious – user3629249 Apr 30 '19 at 00:21
  • OT: for ease of readability and understanding: 1) consistently indent the code (no random 2 character indents) Suggest each indent level be 4 spaces. – user3629249 Apr 30 '19 at 00:22
  • When encountering (for instance) a `if`, there may or may not be a following space, they could be a `left paren` Such details need to be taken into account – user3629249 Apr 30 '19 at 00:23
  • Please be consistent: in `main()` there is: `takeinput();` however, the actual function signature is: `int takeinput()` rather than `void takeinput()` – user3629249 Apr 30 '19 at 00:25
  • when a function takes no parameters, then the prototype should have `void` between the parens, but not in the actual function signature – user3629249 Apr 30 '19 at 00:26
  • regarding: `while ( strlen ( gets( data ) ) > 0 )` The function: `gets()` has been depreciated for years and completely removed since (about) 1999. Your compiler should have told you about this problem – user3629249 Apr 30 '19 at 00:28
  • Please be consistent: in main() there is: `theresult();` however, the actual function signature is: `int theresult()` rather than `void theresult()` – user3629249 Apr 30 '19 at 00:30
  • OT: regarding: ` printf( "Could not open file test.c" ) ; return 1;` Error messages should be output to `stderr` rather than `stdout` and when the error indication is from a C library function, should also output the text reason the system thinks the error occurred. The function: `perror( "Could not open file test.c" );` correctly performs all of that. The preferred method of exiting after an error is: `exit( EXIT_FAILURE );` – user3629249 Apr 30 '19 at 00:33
  • Rather than calling `gets( data )` a much better option is: `fgets( data, sizeof( data ), stdin )` Then since `fgets()` also inputs the trailing '\n' the next statement (to remove that newline) should be: `data[ strspn( data, "\n" ) ] = '\0';` However, since you want the '\n' in the temporary file, best to not remove it – user3629249 Apr 30 '19 at 00:40
  • OT: rather than the extra calls to `fopen()` and `fclose()` suggest just calling `rewind( fp );` – user3629249 Apr 30 '19 at 00:43
  • regarding: `char str[100],` and `while((ch = fgetc(fp)) != EOF) { str[i]=ch; i++; }` The file is very likely to be longer than 100 bytes, When it is longer than 100 bytes, then the buffer `str[]` will overflow, resulting in undefined behavior and can lead to a seg fault event – user3629249 Apr 30 '19 at 00:46
  • regarding: `for (token = strtok(str, delimiters); token != NULL; token = strtok(NULL, delimiters))` This overlays the initial value for `token` with a different value. Suggest rewriting this function to something like: `token = strtok( str , delimiters ); While( token ) { ....process token here.... token = strtok( NULL, delimiters ); }` – user3629249 Apr 30 '19 at 00:51
  • OT: the function: `tolower()` handles non alphabetical characters correctly, so no need to call: `isalpha()` – user3629249 Apr 30 '19 at 00:54
  • regarding: `if i break please re-join it. float float` is not a valid code statement. Suggest using your source code as the input to your executable, similiar to `mycode < mycode.c` – user3629249 Apr 30 '19 at 01:03
  • regarding: `while (i < strlen(str) && str[i] != ' ' && isalpha(str[i])) { unit[j++] = tolower(str[i++]); }` Strongly suggest completely removing this code block. It is accomplishing nothing and all C keywords are already lower case – user3629249 Apr 30 '19 at 01:06

1 Answers1

1

The error is in function theresult. In the for loop for tokenizing the input you process and search a word from the whole input str instead of the word token returned from strtok. You would not have to check for space (' ') after tokenizing because space is part of the delimiters.

It seems to work for me after changing the loop to:

   for (token = strtok(str, delimiters); token != NULL;
         token = strtok(NULL, delimiters)) /* 'for loop' conditional part */
        /* prints token one per line */
       // puts(token);      // prints token

        for (i = 0; i < strlen(token); i++)
        {
            while (i < strlen(token) && token[i] != ' ' && isalpha(token[i]))
            {
                unit[j++] = tolower(token[i++]);
            }
            if (j != 0)
            {
                unit[j] = '\0';
                pos = binarysearch(unit, key);
                j = 0;
                if (pos != -1)
                {
                   key[pos].occur++;
                }
            }
        }

The output is

The contents of test.c file are:
if i break please re-join it. float float
***********************
   Keyword      Count
***********************
  break   1
  float   2
  if      1

Additional remarks:

Instead of reading the whole input file into str I suggest to read and process the input line by line using fgets and a loop.

If you want the user to enter the input text you could process the input lines directly instead of writing them to a file "test.c" first and then reading the file.

The while loop after tokenizing will cut off everything from the first non-alpha character. Maybe you implemented this because it did not work due to the original error. When the token is "re-join", it will search for "re". You should check if this is what you want and change the while loop if necessary.

Bodo
  • 9,287
  • 1
  • 13
  • 29