0

I trying to write a program that gets input of up to 10 .txt files containing lines of student names and their grades. The program is supposed to create a new process for each input file and write to each student's name and average grade to a temporary file named after the pid of the process.

Eventually, the parent should wait for all processes and create a new process to combine all temporary files into a combined output file. I had struggled to pass over the pids correctly down to the output file creation so I always get only one file written successfully and the other not (when running on two input files).

Basically one of the pids is missing each time therefore my program tries to read an inexistent file. I struggling to understand why this happens and of a possible fix. Any help will be appreciated.

Program:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

#define GRADES_FILE "all_std.log"

typedef struct {
    char name[11];
    int grades[256];
    int num_grades;
} student_t;

void report_data_summary(int num_stud) {
    fprintf(stderr, "grade calculation for %d students is done\n", num_stud);
}

void calculate_students_grade_average_from_input(int file_number, char* filenames[])
{
    student_t students[256];
    int num_students = 0;
    char filename[50];
    sprintf(filename, "%d.temp", getpid());

    FILE* fp_in = fopen(filenames[file_number], "r");
    if (fp_in == NULL) {
        printf("Could not open file %s\n", filenames[file_number]);
        exit(EXIT_FAILURE);
    }

    while (!feof(fp_in)) {
        fscanf(fp_in, "%s", students[num_students].name);
        int grade;
        students[num_students].num_grades = 0;
        while (fscanf(fp_in, "%d", &grade) == 1) {
            students[num_students].grades[students[num_students].num_grades] = grade;
            students[num_students].num_grades++;
        }
        num_students++;
    }
    fclose(fp_in);

    FILE* fp_out = fopen(filename, "w");
    if (fp_out == NULL) {
        printf("Could not open output file %s\n", filename);
        return;
    }

    for (int j = 0; j < num_students; ++j) {
        int sum = 0;
        for (int k = 0; k < students[j].num_grades; ++k) {
            sum += students[j].grades[k];
        }
        float avg = (float)sum / students[j].num_grades;
        fprintf(fp_out, "%s %.1f\n", students[j].name, avg);
    }
    fclose(fp_out);

    fprintf(stderr, "process: %d file: %s number of students: %d\n", getpid(), filenames[file_number], num_students);
}


void create_output(int file_count, pid_t* temp_processes[])
{
    int total_students = 0;
    pid_t pid = fork();

    if (pid < 0) {
        perror("fork failed");
        exit(EXIT_FAILURE);
    }
    else if (pid == 0) {
        FILE* fp_final = fopen(GRADES_FILE, "w");
        if (fp_final == NULL) {
            perror("Could not open final output file");
            exit(EXIT_FAILURE);
        }

        for (int i = 0; i < file_count; ++i) {
            char temp_filename[16];
            sprintf(temp_filename, "%d.temp", temp_processes[i]);
            
            FILE* fp_temp = fopen(temp_filename, "r");
            if (fp_temp == NULL) {
                printf("Could not open temporary file %s\n",temp_filename);
                return;
            }

            char line[101];
            while (fgets(line, sizeof(line), fp_temp)) {
                fputs(line, fp_final);
                total_students++;
            }

            report_data_summary(total_students);
            fclose(fp_temp);
        }

        fclose(fp_final);
        return;
    }
    else {
        wait(NULL);
    }
}




void ex01(int file_count, char* filenames[])
{
    pid_t temp_processes[10];
    pid_t pid;

    for (int i = 1; i < file_count; ++i) {
        pid = fork();

        if (pid < 0) {
            perror("fork failed");
            exit(EXIT_FAILURE);
        }
        else if (pid > 0) {
            temp_processes[i-1] = pid;
            printf("%d = %d\n",0,temp_processes[0]);
            printf("%d = %d\nend\n",1,temp_processes[1]);
        }
        else {
            calculate_students_grade_average_from_input(i, filenames);
            return;
        }
    }

    for (int i = 1; i < file_count; ++i) {
        wait(NULL);
    }
    

    if (pid > 0)
    {
        create_output(file_count - 1, temp_processes);
    }
}

int main(int argc, char* argv[]) {
    ex01(argc, argv);
    return 0;
}

Input:

file1.txt:
Abraham 80 90 75
Benny 90
Garland 70 9   90 100

file2.txt:
Dana 90 95
Ron 100 80 90

Output file:

Abraham 81.7
Benny 90.0
Garland 67.2

Output missing the second file's students.

Console output:

0 = 35222
1 = 0
end
0 = 35222
1 = 35223
end
process: 35223 file: gr_2.txt number of students: 2
process: 35222 file: gr_1.txt number of students: 3
grade calculation for 3 students is done
Could not open temporary file 0.temp
SiiilverSurfer
  • 999
  • 1
  • 6
  • 12
  • 3
    The `fork` function creates a brand new and independent *process*, not a thread. Also remember that the processes are really independent from each other, with their own virtual memory map. There's no shared memory between the processes, of you expect it. – Some programmer dude Jul 23 '23 at 16:34
  • I know, I'm trying to bridge that gap by using the temporary files and saving their names to eventually use in the main process. But this name delivering is failing somewhere and I can't find it. – SiiilverSurfer Jul 23 '23 at 16:37
  • 4
    The line `while (!feof(fp_in))` is wrong. I suggest that you read this: [Why is “while( !feof(file) )” always wrong?](https://stackoverflow.com/q/5431941/12149471) – Andreas Wenzel Jul 23 '23 at 16:48
  • 1
    I suggest you do some [*rubber duck debugging*](https://en.wikipedia.org/wiki/Rubber_duck_debugging) of your code. All processe uses *one* single file for its output, which is not shared between the processes. When one process opens it for writing, the current contents of the file will be lost. – Some programmer dude Jul 23 '23 at 16:49
  • @Someprogrammerdude I understand that but I think the problem occurs beforehand. The writing to file only happens in `create_output` but it is first trying to read from an inexistent file (wrong file name) and that's where it fails. The mechanism of passing down the pids is wrong. – SiiilverSurfer Jul 23 '23 at 17:16
  • In the line `sprintf(filename, "%d.temp", getpid());`, you seem to be assuming that the return type of `getpid()` (which is `pid_t`) is equivalent to `int`. This assumption appears dangerous to me. I believe `sprintf(filename, "%jd.temp", (intmax_t)getpid());` would be safer. Note that [`intmax_t`](https://en.cppreference.com/w/c/types/integer) requires `#include `. – Andreas Wenzel Jul 23 '23 at 17:31
  • 1
    The output indicates that the name of the file that could not be opened was `0.temp`, but your temp files are named with pids, not indexes. 0 is not a valid pid. – John Bollinger Jul 23 '23 at 17:39
  • I presume you're just experimenting with `fork()` - which is an interesting subject - but your use case here is pathalogically inappropriate for `fork()`. You create a subprocess to read each individual file ... and then read each of the subprocesses' output *files* sequentially by the main program? This simply isn't a very good multiprocessing use case. – erik258 Jul 23 '23 at 19:53
  • Side note: I suggest that you change `ex01(argc, argv);` to `ex01(argc-1,argv+1);`. That way, `file_count` and `filenames` would have the same meaning in all functions. For example, you could then use the same loop `for (int i = 0; i < file_count; ++i) {` in the functions `create_output` and `ex01`, instead of using `for (int i = 1; i < file_count; ++i) {` in one function and `for (int i = 0; i < file_count; ++i) {` in the other function. – Andreas Wenzel Jul 23 '23 at 21:59
  • This is not a multithreaded program. Please fix your title, your tags, and your expectations. – user207421 Jul 24 '23 at 01:02

1 Answers1

2

In the line

void create_output(int file_count, pid_t* temp_processes[])

you defined the second argument of the function to be of type pid_t **. However, in the function ex01, you are calling the function create_output like this:

create_output(file_count - 1, temp_processes);

The expression temp_processes will decay to &temp_processes[0], which is of type pid_t*. Therefore, your program is invoking undefined behavior, because the types of the second parameter do not match.

Your compiler should be warning you about this. This is the warning I get from gcc:

<source>: In function 'ex01':
<source>:142:39: warning: passing argument 2 of 'create_output' from incompatible pointer type [-Wincompatible-pointer-types]
  142 |         create_output(file_count - 1, temp_processes);
      |                                       ^~~~~~~~~~~~~~
      |                                       |
      |                                       pid_t * {aka int *}
<source>:65:43: note: expected 'pid_t **' {aka 'int **'} but argument is of type 'pid_t *' {aka 'int *'}
   65 | void create_output(int file_count, pid_t* temp_processes[])
      |                                    ~~~~~~~^~~~~~~~~~~~~~~~

I suggest that you change the line

void create_output(int file_count, pid_t* temp_processes[])

to:

void create_output(int file_count, pid_t temp_processes[])
Andreas Wenzel
  • 22,760
  • 4
  • 24
  • 39
  • I just tried this before I came here to see you suggesting the same thing and of course, the program works as expected now. Thank you so much! – SiiilverSurfer Jul 24 '23 at 09:41