As noted in a comment by Barmar, the fundamental problem is that you are receiving SIGCHLD
signals as well as the signals generated by the child processes calling kill()
.
Tangentially, note that the children will all send the same signal to the parent — you don't call srand()
in the children with different seeds, so they all get the same value from rand()
.
Generally, you should prefer sigaction()
over signal()
. One reason to prefer sigaction()
is that the handler is not automatically reset to the default, eliminating a timing problem.
You get SIGCHLD signals from the dying children, as well as one of SIGHUP, SIGINT or SIGQUIT from the child calling kill()
. Your use of sigprocmask()
prevents signals from being delivered except when sigsuspend()
is called. You can get chained signals — one SIGINT and one SIGCHLD can be pending, and two separate calls to the signal handler occur, leading to the larger than expected signal count.
The code shown below takes due note of How to avoid using printf()
in a signal handler? and uses write()
to report a limited amount of information. POSIX permits the use of write()
in a signal handler; the C standard doesn't (in part because it doesn't recognize write()
as a standard function, but mainly because it is very stringent about what can happen in a signal handler).
The code tests sigfillset()
and sigemptyset()
because they are macros on macOS with a comma operator, the RHS of which is simply 0
. With my default compilation options, GCC complains about the unused value. So, the tests use the returned value, even though it's always zero.
Note that I ran the tests on a Mac running macOS rather than Linux. However, the general behaviour of the two systems will probably be very similar.
Here is a minimal adaptation of your code, adding signal reporting to the signal handler and printing before and after sigsuspend()
(source code sig17.c
):
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
static volatile sig_atomic_t s = 0;
static char message[] = "Signal XX received\n";
static void h(int n)
{
signal(n, h);
++s;
message[7] = (n / 10) + '0';
message[8] = (n % 10) + '0';
write(2, message, sizeof(message) - 1);
}
static void err_error(const char *fmt, ...);
int main(int argc, char *argv[])
{
sigset_t ms;
int n;
if (argc != 2)
err_error("Usage: %s num-children\n", argv[0]);
for (n = 1; n < NSIG; ++n)
signal(n, h);
if (sigfillset(&ms) != 0)
err_error("sigfillset() failed\n");
sigprocmask(SIG_SETMASK, &ms, NULL);
if (sigemptyset(&ms) != 0)
err_error("sigemptyset() failed\n");
// first part
for (n = 1; n <= atoi(argv[1]); ++n)
{
int pid = fork();
if (pid < 0)
err_error("fork() failed\n");
else if (pid != 0)
{
printf("%d: Started %d\n", n, pid);
sigsuspend(&ms);
printf("%d: Signalled!\n", n);
}
else
{
kill(getppid(), 1 + rand() % 3);
exit(0);
}
}
// the kill part
int corpse, status;
while ((corpse = wait(&status)) != -1)
printf("Dead: %5d - 0x%.4X\n", corpse, status);
printf("%d\n", s);
return 0;
}
static void err_error(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(1);
}
On one of the many runs of this code (with 5 children specified), I got the output:
1: Started 26778
Signal 02 received
1: Signalled!
2: Started 26779
Signal 20 received
2: Signalled!
3: Started 26780
Signal 02 received
3: Signalled!
4: Started 26781
Signal 20 received
Signal 02 received
4: Signalled!
5: Started 26782
Signal 20 received
Signal 02 received
5: Signalled!
Dead: 26780 - 0x0000
Dead: 26779 - 0x0000
Dead: 26778 - 0x0000
Dead: 26781 - 0x0000
Dead: 26782 - 0x0000
7
As you can see, the generated signal is always 2 (SIGINT); signal 20 is SIGCHLD. In this example, the program caught 4 of the 5 SIGINT signals and 3 of the 5 SIGCHLD signals. Note that sometimes two signal handlers were called because both a SIGINT and a SIGCHLD signal were pending.
The sigprocmask()
call ensures no signals are delivered asynchronously. If that call is removed, then the code detects 10 signals (source code sig19.c
):
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
static volatile sig_atomic_t s = 0;
static char message[] = "Signal XX received\n";
static void h(int n)
{
signal(n, h);
++s;
message[7] = (n / 10) + '0';
message[8] = (n % 10) + '0';
write(2, message, sizeof(message) - 1);
}
static void err_error(const char *fmt, ...);
int main(int argc, char *argv[])
{
sigset_t ms;
int n;
if (argc != 2)
err_error("Usage: %s num-children\n", argv[0]);
for (n = 1; n < NSIG; ++n)
signal(n, h);
if (sigemptyset(&ms) != 0)
err_error("sigemptyset() failed\n");
for (n = 1; n <= atoi(argv[1]); ++n)
{
int pid = fork();
if (pid < 0)
err_error("fork() failed\n");
else if (pid != 0)
{
printf("%d: Started %d\n", n, pid);
sigsuspend(&ms);
printf("%d: Signalled!\n", n);
}
else
{
kill(getppid(), 1 + rand() % 3);
exit(0);
}
}
int corpse, status;
while ((corpse = wait(&status)) != -1)
printf("Dead: %5d - 0x%.4X\n", corpse, status);
printf("%d\n", s);
return 0;
}
static void err_error(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(1);
}
Sample output:
1: Started 26857
Signal 02 received
1: Signalled!
Signal 20 received
2: Started 26858
Signal 02 received
2: Signalled!
Signal 20 received
3: Started 26859
Signal 02 received
3: Signalled!
Signal 20 received
4: Started 26860
Signal 02 received
4: Signalled!
Signal 20 received
5: Started 26861
Signal 02 received
5: Signalled!
Dead: 26860 - 0x0000
Dead: 26859 - 0x0000
Dead: 26858 - 0x0000
Dead: 26857 - 0x0000
Signal 20 received
Dead: 26861 - 0x0000
10
Note that in this code, the signals appear at times other than when sigsuspend()
is called. If the SIGCHLD
signal is not trapped, then the code produces a count of 5 reliably (source code sig23.c
). This also generates different signals (deterministically), and the children exit with different statuses.
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/wait.h>
#include <unistd.h>
static volatile sig_atomic_t s = 0;
static char message[] = "Signal XX received\n";
static void h(int n)
{
signal(n, h);
++s;
message[7] = (n / 10) + '0';
message[8] = (n % 10) + '0';
write(2, message, sizeof(message) - 1);
}
static void err_error(const char *fmt, ...);
int main(int argc, char *argv[])
{
sigset_t ms;
int n;
if (argc != 2)
err_error("Usage: %s num-children\n", argv[0]);
for (n = 1; n < NSIG; ++n)
{
if (n != SIGCHLD && n != SIGKILL && n != SIGSTOP)
signal(n, h);
}
if (sigemptyset(&ms) != 0)
err_error("sigemptyset() failed\n");
for (n = 1; n <= atoi(argv[1]); ++n)
{
int pid = fork();
if (pid < 0)
err_error("fork() failed\n");
else if (pid != 0)
{
printf("%d: Started %d\n", n, pid);
sigsuspend(&ms);
printf("%d: Signalled!\n", n);
}
else
{
int sig = n % 3 + 1;
kill(getppid(), sig);
exit(sig);
}
}
int corpse, status;
while ((corpse = wait(&status)) != -1)
printf("Dead: %5d - 0x%.4X\n", corpse, status);
printf("%d\n", s);
return 0;
}
static void err_error(const char *fmt, ...)
{
va_list args;
va_start(args, fmt);
vfprintf(stderr, fmt, args);
va_end(args);
exit(1);
}
Sample output:
1: Started 27162
Signal 02 received
1: Signalled!
2: Started 27163
Signal 03 received
2: Signalled!
3: Started 27164
Signal 01 received
3: Signalled!
4: Started 27165
Signal 02 received
4: Signalled!
5: Started 27166
Signal 03 received
5: Signalled!
Dead: 27165 - 0x0200
Dead: 27164 - 0x0100
Dead: 27163 - 0x0300
Dead: 27162 - 0x0200
Dead: 27166 - 0x0300
5
You can go on ringing the changes with the code, tweaking the way that signals are handled. Nevertheless, the fundamental cause of the 'over-count' is that SIGCHLD signals are handled as well as those generated by the child processes calling kill()
.