The issue you are having is not with the loop, and not with the eof
.
The real issue is you have incorrect parsing logic.
Your input file is not uniformed:
- Different session lines have different "MODE" in them
- Number of blank lines varies from group to group
- Blank lines may actually contain any number of space characters
- "Number of sequences" line appears in different places in different groups
To parse such a file you need a more flexible logic that will check each input line, collect all the data needed to build an output line, and only then print it to the output file.
To do this, you can use one loop reading only one line at a time, and then testing its contents using the strncmp
function.
Once you identified the type of data the line contains, save it to a variable using sscanf
function.
Here is the code that will do the job:
#include <stdio.h>
#include <string.h>
int main(int argc, char **argv) {
FILE *file_in, *file_out;
char line[200];
/* intialize these just in case we want to validate the input file */
int current_session = 0;
int current_sequences = 0;
int current_registration = 0;
/* these arrays can probably be smaller */
char chars_given[200] = { 0 };
char chars_recognized[200] = { 0 };
file_in = fopen("summary.txt", "r");
if (file_in == NULL) {
perror("Error opening input file");
return 1;
}
file_out = fopen("ordinated.txt", "w");
if (file_out == NULL) {
perror("Error opening output file");
return -1;
}
while (fgets(line, 200, file_in) != NULL) {
/* check if this is start of session using safe string comparison */
if (strncmp(line, "session", strlen("session")) == 0) {
sscanf(line, "session %d", ¤t_session);
} else if (strncmp(line, "number of sequences", strlen("number of sequences")) == 0) {
sscanf(line, "number of sequences: %d", ¤t_sequences);
} else if (strncmp(line, "registration", strlen("registration")) == 0) {
sscanf(line, "registration %d", ¤t_registration);
} else if (strncmp(line, "characters given", strlen("characters given")) == 0) {
sscanf(line, "characters given: %s", chars_given);
} else if (strncmp(line, "characters recognized", strlen("characters recognized")) == 0) {
sscanf(line, "characters recognized: %s", chars_recognized);
} else {
/* This is a line with no information (blank or separator).
Time to print results we collected, and reset the variables
for the next set of results. */
/* check we have enough information to output a line */
if (current_session > 0 && current_sequences > 0 &&
current_registration > 0 && strlen(chars_given) > 0) {
/* check if anything was recognized */
if (strlen(chars_recognized) > 0) {
fprintf(file_out, "%d %d %d %s %s\n", current_session, current_registration,
current_sequences, chars_given, chars_recognized);
} else { /* one less parameter to output if nothing was recognized */
fprintf(file_out, "%d %d %d %s\n", current_session, current_registration,
current_sequences, chars_given);
}
/* Now reset for next time. If you don't do this, the output line will repeat */
current_registration = 0;
chars_given[0] = '\0';
chars_recognized[0] = '\0';
}
}
}
/* the last block may not be printed in the loop if there is no empty line after it */
if (current_session > 0 && current_sequences > 0 &&
current_registration > 0 && strlen(chars_given) > 0) {
/* check if anything was recognized */
if (strlen(chars_recognized) > 0) {
fprintf(file_out, "%d %d %d %s %s\n", current_session, current_registration,
current_sequences, chars_given, chars_recognized);
} else { /* one less parameter to output if nothing was recognized */
fprintf(file_out, "%d %d %d %s\n", current_session, current_registration,
current_sequences, chars_given);
}
}
fclose(file_in);
fclose(file_out);
return 0;
}
This code is a bit ugly, but I tried to keep it simple.
It can be cleaned up by using structures, some flags, and moving some of the code to separate functions.
Edit: this code omits sanity checks for simplicity, and assumes the input file is not corrupt, i.e. first non empty line is always session, lines contain all the information they should, etc.