Reading words separately from file

Question

I'm trying to make a program that scans a file containing words line by line and removes words that are spelled the same if you read them backwards (palindromes)

This is the program.c file:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "header.h"

int main(int argc, char **argv)
{
if(argc != 3)
{
   printf("Wrong parameters");
   return 0;
}
FILE *data;
FILE *result;
char *StringFromFile = (char*)malloc(255);
char *word = (char*)malloc(255);

const char *dat = argv[1];
const char *res = argv[2];

data = fopen(dat, "r");
result =fopen(res, "w");

while(fgets(StringFromFile, 255, data))
{
    function1(StringFromFile, word);
    fputs(StringFromFile, result);
}
free(StringFromFile);
free (word);
fclose(data);
fclose(result);
return 0;
}

This is the header.h file:

#ifndef HEADER_H_INCLUDEC
#define HEADER_H_INCLUDED

void function1(char *StringFromFile, char *word);
void moving(char *StringFromFile, int *index, int StringLength, int WordLength);

#endif

This is the function file:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "header.h"

void function1(char *StringFromFile, char *word)
{
int StringLength = strlen(StringFromFile);
int WordLength;
int i;
int p;
int k;
int t;
int m;
int match;
for(i = 0; i < StringLength; i++)
{   k=0;
    t=0;
    m=i;
if (StringFromFile[i] != ' ')
{   while (StringFromFile[i] != ' ')
    {
        word[k]=StringFromFile[i];
        k=k+1;
        i=i+1;
    }
//printf("%s\n", word);
WordLength = strlen(word)-1;
p = WordLength-1;
match=0;
while (t <= p)
{
    if (word[t] == word[p])
        {
            match=match+1;
        }
    t=t+1;
    p=p-1;
}
if ((match*2) >= (WordLength))
{
    moving(StringFromFile, &m, StringLength, WordLength);
}
}
}

}

void moving(char *StringFromFile, int *index, int StringLength, int WordLength)
{   int i;
    int q=WordLength-1;
    for(i = *index; i < StringLength; i++)
{
    StringFromFile[i-1] = StringFromFile[i+q];
}
*(index) = *(index)-1;
}

It doesn't read each word correctly, though.

This is the data file:

abcba rttt plllp
aaaaaaaaaaaa
ababa
abbbba
kede

These are the separate words the program reads:

abcba
rttta
plllp
aaaaaaaaaaaa
ababa
abbbba
kede

This is the result file:

abcba rtttp



kede

It works fine if there is only one word in a single line, but it messes up when there are multiple words. Any help is appreciated.

@Fefux that is the question: there are three words on the line containing `abcba`. — Weather Vane, Nov 30 '16 at 17:05
I'm not sure if you're counting the first line as three words that each need to be removed if they're each palindromes, but it seems like your program is reading the whole line and seeing that the line altogether is not a palindrome. However, I'm not sure why it's printing the way it is. — Jon, Nov 30 '16 at 17:20
Use `strtok` to tokenize the words in each line. You can use `space`, `tab` and any other word separator to split the line into words with `strtok`. Then it is just a matter of operating on each word. As for the check, you can either do as you are, or just set a pointer to the start and end char in each word and then check with `while (s < e && *s == *e) s++, e--;` (where `s` and `e` are the start and end pointers) — David C. Rankin, Nov 30 '16 at 17:20

score 0 · Answer 1 · answered Nov 30 '16 at 20:38

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "header.h"

# define MAX 255

int Find_Number_Words_in_Line( char str[MAX] )
{
   char *ptr;
   int count = 0;
   int j;

   /* advance character pointer ptr until end of str[MAX] */
   /* everytime you see the space character, increase count */
   /* might not always work, you'll need to handle multiple space characters before/between/after words */

   ptr = str;
   for ( j = 0; j < MAX; j++ )
   {
      if ( *ptr == ' ' )
         count++;
      else if (( *ptr == '\0' ) || ( *ptr == '\n' ))
         break;

      ptr++;
   }

   return count;
}

void Extract_Word_From_Line_Based_on_Position( char line[MAX], char word[MAX], const int position )
{
   char *ptr;

   /* move pointer down line[], counting past the number of spaces specified by position */
   /* then copy the next word from line[] into word[] */
}


int Is_Palindrome ( char str[MAX] )
{
   /* check if str[] is a palindrome, if so return 1, else return 0 */
}


int main(int argc, char **argv)
{
   FILE *data_file;
   FILE *result_file;
   char *line_from_data_file = (char*)malloc(MAX);
   char *word = (char*)malloc(MAX);
   const char *dat = argv[1];
   const char *res = argv[2];
   int j, n;

   if (argc != 3)
   {
      printf("Wrong parameters");
      return 0;
   }

   data_file = fopen(dat, "r");
   result_file = fopen(res, "w");

   fgets( line_from_data_file, MAX, data_file );
   while ( ! feof( data_file ) )
   {
       /*
          fgets returns everything up to newline character from data_file,
          function1 in original context would only run once for each line read
          from data_file, so you would only get the first word

             function1( line_from_data_file, word );
             fputs( word, result_file );
             fgets( line_from_data_file, MAX, data_file );

          instead try below, you will need to write the code for these new functions
          don't be afraid to name functions in basic English for what they are meant to do
          make your code more easily readable
       */

      n = Find_Number_Words_in_Line( line_from_data_file );
      for ( j = 0; j < n; j++ )
      {
         Extract_Word_From_Line_Based_on_Position( line_from_data_file, word, n );
         if ( Is_Palindrome( word ) )
            fputs( word, result_file );  /* this will put one palindrome per line in result file */
      }

      fgets( line_from_data_file, MAX, data_file );
   }
   free( line_from_data_file );
   free( word );

   fclose( data_file );
   fclose( result_file );

   return 0;
}

Do *NOT* cast the return of `malloc`, it is unnecessary. See: [**Do I cast the result of malloc?**](http://stackoverflow.com/q/605845/995714) for thorough explanation. Why dynamically allocate at all, why not: `char line_from_data_file[MAX] = "";` Why are you using `feof`? — David C. Rankin, Nov 30 '16 at 20:50
i did not correct everything he had 100%, i was typing from the hip trying to keep most of what he had and show the basic logic fault of his call to `function1`. Feel free to improve on what i did. — ron, Nov 30 '16 at 21:11
No, that's fine, I was just curious. Generally, you would simply read all lines from the file with a `while (fgets (line_from_data_file, MAX, data_file)) {... process all here ...}` type loop, so the `feof` was a bit odd. — David C. Rankin, Nov 30 '16 at 21:50
to this day i still use feof with the while loop structure i typed. For anyone it is much easier to read and see the logic, versus loading everything up on one line which can be cryptic and confusing to non experienced programmers. My perspective is: understand the entire problem, break it apart to basic functions, then tackle each function, and verify each function operates correctly. Combining multiple operations into one function or one loop is usually where the problems arise, and i think the next answer posted proves this. — ron, Dec 01 '16 at 18:26

score 0 · Answer 2 · answered Dec 01 '16 at 03:35

To follow up from the comments, you may be overthinking the problem a bit. To check whether each word in each line of a file is a palindrome, you have a 2 part problem. (1) reading each line (fgets is fine), and (2) breaking each line into individual words (tokens) so that you can test whether each token is a palindrome.

When reading each line with fgets, a simple while loop conditioned on the return of fgets will do. e.g., with a buffer buf of sufficient size (MAXC chars), and FILE * stream fp open for reading, you can do:

while (fgets (buf, MAXC, fp)) { /* read each line */
    ...                         /* process line */
}

(you can test the length of the line read into buf is less than MAXC chars to insure you read the complete line, if not, any unread chars will be placed in buf on the next loop iteration. This check, and how you want to handle it, is left for you.)

Once you have your line read, you can either use a simple pair of pointers (start and end pointers) to work your way through buf, or you can use strtok and let it return a pointer to the beginning of each word in the line based on the set of delimiters you pass to it. For example, to split a line into words, you probably want to use delimiters like " \t\n.,:;!?" to insure you get words alone and not words with punctuation (e.g. in the line "sit here.", you want "sit" and "here", not "here.")

Using strtok is straight forward. On the first call, you pass the name of the buffer holding the string to be tokenized and a pointer to the string containing the delimiters (e.g. strtok (buf, delims) above), then for each subsequent call (until the end of the line is reached) you use NULL as name of the buffer (e.g. strtok (NULL, delims)) You can either call it once and then loop until NULL is returned, or you can do it all using a single for loop given that for allows setting an initial condition as part of the statement, e.g., using separate calls:

char  *delims = " \t\n.,:;";    /* delimiters */
char *p = strtok (buf, delims); /* first call to strtok */

while ((p = strtok (NULL, delims))) {  /* all subsequent calls */
    ... /* check for palindrome */
}

Or you can simply make the initial call and all subsequent calls in a for loop:

/* same thing in a single 'for' statement */
for (p = strtok (buf, delims); p; p = strtok (NULL, delims)) {
    ... /* check for palindrome */
}

Now you are to the point you need to check for palindromes. That is a fairly easy process. Find the length of the token, then either using string indexes, or simply using a pointer to the first and last character, work from the ends to the middle of each token making sure the characters match. On the first mismatch, you know the token is not a palindrome. I find a start and end pointer just as easy as manipulating sting indexes, e.g. with the token in s:

char *ispalindrome (char *s)    /* function to check palindrome */
{
    char *p = s,                   /* start pointer */
        *ep = s + strlen (s) - 1;  /* end pointer  */

    for ( ; p < ep; p++, ep--)  /* work from end to middle */
        if (*p != *ep)          /* if chars !=, not palindrome */
            return NULL;

    return s;
}

If you put all the pieces together, you can do something like the following:

#include <stdio.h>
#include <string.h>

enum { MAXC = 256 };    /* max chars for line buffer */

char *ispalindrome (char *s);

int main (int argc, char **argv) {

    char buf[MAXC] = "",        /* line buffer */
         *delims = " \t\n.,:;"; /* delimiters */
    unsigned ndx = 0;           /* line index */
    FILE *fp = argc > 1 ? fopen (argv[1], "r") : stdin;

    if (!fp) {  /* validate file open for reading */
        fprintf (stderr, "error: file open failed '%s'.\n", argv[1]);
        return 1;
    }

    while (fgets (buf, MAXC, fp)) { /* read each line */
        char *p = buf;   /* pointer to pass to strtok */
        printf ("\n line[%2u]:  %s\n tokens:\n", ndx++, buf);
        for (p = strtok (buf, delims); p; p = strtok (NULL, delims))
            if (ispalindrome (p))
                printf ("  %-16s  - palindrome\n", p);
            else
                printf ("  %-16s  - not palindrome\n", p);
    }
    if (fp != stdin) fclose (fp);

    return 0;
}

char *ispalindrome (char *s)    /* function to check palindrome */
{
    char *p = s, *ep = s + strlen (s) - 1;  /* ptr & end-ptr */

    for ( ; p < ep; p++, ep--)  /* work from end to middle */
        if (*p != *ep)          /* if chars !=, not palindrome */
            return NULL;

    return s;
}

Example Input

$ cat dat/palins.txt
abcba rttt plllp
aaaaaaaaaaaa
ababa
abbbba
kede

Example Use/Output

$ ./bin/palindrome <dat/palins.txt

 line[ 0]:  abcba rttt plllp

 tokens:
  abcba             - palindrome
  rttt              - not palindrome
  plllp             - palindrome

 line[ 1]:  aaaaaaaaaaaa

 tokens:
  aaaaaaaaaaaa      - palindrome

 line[ 2]:  ababa

 tokens:
  ababa             - palindrome

 line[ 3]:  abbbba

 tokens:
  abbbba            - palindrome

 line[ 4]:  kede

 tokens:
  kede              - not palindrome

Look things over and think about what it taking place. As mentioned above, insuring you have read a complete line in each call with fgets should be validated, that is left to you. (but with this input file -- of course it will) If you have any questions, let me know and I'll be happy to help further.

Thank you, but the problem is that I have to remove them from the file. The answer has to be: rttt, kede. — user6952624, Dec 01 '16 at 08:11
That is the simple part, if the check for `ispalindrome` is `false`, just do whatever you need to do with it. This was simply to show you how to handle the determination. What you do with that determination is completely up to you. If you need help with that, just ask, but that should be something you should try first. — David C. Rankin, Dec 04 '16 at 04:33

Reading words separately from file

2 Answers2