0

I'm doing an exercise for the course on Operating Systems. To learn to use signals (one of my weak points) I wanted to try to synchronize N processes only with signals. But I can't make it.

The code is:

#include "Header.h"
#include <signal.h>

// first sigHandler
void sigHandler(int signum)
{    
    printf("Received signal\n");
}

// second sigHandler
void sigHandler2(int signum)
{
    printf("Received other signal\n");
}

int main(int argc, char *argv[])
{
    // Number of process
    int N = 2;

    // Assign sigHandlers
    signal(SIGUSR1, sigHandler);
    signal(SIGUSR2, sigHandler2);

    // array for pids to activate in order;
    pid_t pid[N];

    int i;

    for (i = 0; i < N; i++)
    {
        // creation child
        pid[i] = fork();

        if (pid == 0)
        {
            // if is the last child, resume the father
            if (i == N - 1)
            {
                kill(getppid(), SIGUSR2);
            }

            // expect SIGUSR1
            pause();
            printf("%i completed\n", getpid());
            exit(0);
        }
    }

    // expext that all child started
    pause();

    // active the last child
    i--;
    kill(pid[i], SIGUSR1);
    signal(SIGUSR1, sigHandler);

    // active other child
    while (wait(NULL) != -1)
    {
        i--;
        kill(pid[i], SIGUSR1);
        signal(SIGUSR1, sigHandler);
    }
    printf("All fine\n");
    exit(0);
}

I tried to replace pause() system with sleep(10), but the result was four "All fine" messages on the terminal.

I suppose that the problem derives from the asynchronous nature of signals, but I don't understand how to resolve it.

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
Dacqu91
  • 3
  • 3
  • Note the guidelines in the answers to [How to avoid using `printf()` in a signal handler?](https://stackoverflow.com/q/16891019/15168) In practice, it is not a part of your problem, but you should be aware of the rules. – Jonathan Leffler Jul 01 '22 at 13:44

1 Answers1

0

A major part of the problem is the test if (pid == 0) — the array address is not null. You meant if (pid[i] == 0)!

With that change and the minimal changes necessary to get the code to compile under my default compilation options (which are fairly fussy), I get this output from the program sig79:

$ sig79
Received other signal
Received signal
15312 completed
Received signal
15311 completed
All fine
$

While debugging the problem, I used this code, which has copious printing (partly because I was confused by the flawed test). I use a waiting loop because there are circumstances under which your parent process could have children it doesn't create — obscure circumstances, but this code accounts for those. Note that the children exit with a status corresponding to the child number; you can see those numbers in the reported exit status in the waiting loop.

#include <assert.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>

static void write_msg(size_t len, char *msg)
{
    int pid = getpid();
    for (int i = 5; i > 0; i--)
    {
        msg[i-1] = pid % 10 + '0';
        pid /= 10;
    }
    write(STDOUT_FILENO, msg, len);
}

// first sigHandler
static void sigHandler1(int signum)
{    
    assert(signum == SIGUSR1);
    static char msg[] = "XXXXX: Received signal SIGUSR1\n";
    write_msg(sizeof(msg) - 1, msg);
}

// second sigHandler
static void sigHandler2(int signum)
{
    assert(signum == SIGUSR2);
    static char msg[] = "XXXXX: Received signal SIGUSR2\n";
    write_msg(sizeof(msg) - 1, msg);
}

int main(void)
{
    // Number of process
    int N = 2;

    // Assign sigHandlers
    signal(SIGUSR1, sigHandler1);
    signal(SIGUSR2, sigHandler2);

    printf("%d: parent at work\n", getpid());

    // array for pids to activate in order;
    pid_t pid[N];

    for (int i = 0; i < N; i++)
    {
        // creation child
        pid[i] = fork();

        if (pid[i] == 0)
        {
            printf("%d: child %d at play\n", getpid(), i+1);
            // if is the last child, resume the father
            if (i == N - 1)
            {
                kill(getppid(), SIGUSR2);
                printf("%d: signal SIGUSR2 sent to PID %d\n", getpid(), getppid());
            }

            // expect SIGUSR1
            pause();
            printf("%d: child %d completed\n", getpid(), i+1);
            exit(i+1);
        }
        printf("%d: child %d has PID %d\n", getpid(), i+1, pid[i]);
        fflush(stdout);
    }

    // expect that all children started
    pause();

    for (int i = N; i > 0; i--)
    {
        kill(pid[i - 1], SIGUSR1);
        printf("%d: signal SIGUSR1 sent to child %d PID %d\n", getpid(), i, pid[i - 1]);
        int corpse;
        int status;
        while ((corpse = wait(&status)) > 0)
        {
            if (corpse == pid[i - 1])
            {
                printf("%d: child %d PID %d exited with status 0x%.4X\n",
                       getpid(), i, corpse, status);
                break;
            }
            else
                printf("%d: unexpected child PID %d exited with status 0x%.4X\n",
                       getpid(), corpse, status);
        }
    }

    printf("%d: All fine\n", getpid());
    return(0);
}

One time when I ran the program (sig59), I got the output:

$ sig59
15177: parent at work
15177: child 1 has PID 15178
15178: child 1 at play
15177: child 2 has PID 15179
15179: child 2 at play
15179: signal SIGUSR2 sent to PID 15177
15177: Received signal SIGUSR2
15177: signal SIGUSR1 sent to child 2 PID 15179
15179: Received signal SIGUSR1
15179: child 2 completed
15177: child 2 PID 15179 exited with status 0x0200
15177: signal SIGUSR1 sent to child 1 PID 15178
15178: Received signal SIGUSR1
15178: child 1 completed
15177: child 1 PID 15178 exited with status 0x0100
15177: All fine
$

And, to demonstrate that the obscure circumstances can occur:

$ forker -s 0 -c 'exit 37' -- ./sig59
forker: Setting number of child processes to 3
15138: forker at work
15138: launched 15139
15138: launched 15140
15138: launched 15141
15138: executing ./sig59
15138: parent at work
15138: child 1 has PID 15142
15138: child 2 has PID 15143
15142: child 1 at play
15143: child 2 at play
15143: signal SIGUSR2 sent to PID 15138
15138: Received signal SIGUSR2
15138: signal SIGUSR1 sent to child 2 PID 15143
15143: Received signal SIGUSR1
15143: child 2 completed
15138: child 2 PID 15143 exited with status 0x0200
15138: signal SIGUSR1 sent to child 1 PID 15142
15138: unexpected child PID 15139 exited with status 0x2500
15142: Received signal SIGUSR1
15142: child 1 completed
15138: unexpected child PID 15140 exited with status 0x2500
15138: unexpected child PID 15141 exited with status 0x2500
15138: child 1 PID 15142 exited with status 0x0100
15138: All fine
$

The forker program ran three processes each of which in turn ran exit 37 (hex 0x25) and then executed the sig59 process (which had no clue that it had children from its previous life as the forker process).

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • Thanks. The problem was the pid==0. This remained from a previous version of the exercise, and I didn't change because I am air-headed. Thanks for the help. – Dacqu91 Jul 01 '22 at 16:05
  • It caught me by surprise, but I was compiling on a machine with a somewhat older version of GCC (8.3.0) than I use at work. The `-Waddress` option (implied by `-Wall`) didn't report that the code was checking an array address against null. The compilers I use at work (11.2.0, 12.1.0, etc) do report such comparisons, GCC 12.1.0 more copiously (but accurately) than older versions. – Jonathan Leffler Jul 01 '22 at 20:36