2

When I was working on a TCP server running in a dedicated thread, I noticed strange behavior in signal handling. I have prepared the following MWE (I've used cerr to avoid race condition on debug printing):

#include <signal.h>
#include <unistd.h>

#include <iostream>
#include <thread>
#include <chrono>

using namespace std;

#undef THREAD

class RaiiObject
{
public:
    RaiiObject() { cerr << "RaiiObject ctor" << endl; }
    ~RaiiObject() { cerr << "RaiiObject dtor" << endl; }
};

static void signalHandler(int sig)
{
    write(2, "Signal\n", 7);
}

static void blockSigint()
{
    sigset_t blockset;

    sigemptyset(&blockset);
    sigaddset(&blockset, SIGINT);
    sigprocmask(SIG_BLOCK, &blockset, NULL);
}

static void setSigintHandler()
{
    struct sigaction sa;
    sa.sa_handler = signalHandler;
    sa.sa_flags = 0;
    sigemptyset(&sa.sa_mask);
    sigaction(SIGINT, &sa, NULL);
}

void runSelect()
{
    sigset_t emptyset;
    sigemptyset(&emptyset);

    setSigintHandler();

    RaiiObject RaiiObject{};
    fd_set fdRead;

    while (true) {
        cerr << "Loop iteration" << endl;
        FD_ZERO(&fdRead);
        FD_SET(0, &fdRead);
        while (true) {
            if (pselect(FD_SETSIZE, &fdRead, NULL, NULL, NULL, &emptyset) > 0) {
                cerr << "Select" << endl;
            } else {
                cerr << "Select break" << endl;
                return;
            }
        }
    }
}

int main()
{
    cerr << "Main start" << endl;

#ifdef THREAD
    cerr << "Thread start" << endl;
    //blockSigint();
    thread{runSelect}.join();
#else
    runSelect();
#endif

    cerr << "Main exit" << endl;

    return EXIT_SUCCESS;
}

When I compile a single-threaded program (#undef THREAD), I can correctly terminate the runSelect() function with Ctrl-C:

Main start
RaiiObject ctor
Loop iteration
^CSignal
Select break
RaiiObject dtor
Main exit

But when I compile a multithreaded (#define THREAD) program, it hangs at the signal handler:

Main start
RaiiObject ctor
Loop iteration
^CSignal

Only when I block the signal on the main thread with blockSigint() the program again work as I want.

I've examined the program with strace -tt -f and I noticed that working versions use pselect6() with ERESTARTNOHAND:

14:46:53.543360 write(2, "Loop iteration", 14Loop iteration) = 14
14:46:53.543482 write(2, "\n", 1
)       = 1
14:46:53.543586 pselect6(1024, [0], NULL, NULL, NULL, {[], 8}) = ? ERESTARTNOHAND (To be restarted if no handler)
14:46:55.286989 --- SIGINT {si_signo=SIGINT, si_code=SI_USER, si_pid=2707461, si_uid=1000} ---
14:46:55.287120 write(2, "Signal\n", 7Signal
) = 7
14:46:55.287327 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
14:46:55.287569 write(2, "Select break", 12Select break) = 12
14:46:55.287760 write(2, "\n", 1

but broken version uses futex():

[pid 3469011] 14:48:37.211792 write(2, "Loop iteration", 14Loop iteration) = 14
[pid 3469011] 14:48:37.211916 write(2, "\n", 1
) = 1
[pid 3469011] 14:48:37.212031 pselect6(1024, [0], NULL, NULL, NULL, {[], 8} <unfinished ...>
[pid 3469010] 14:48:40.046146 <... futex resumed>) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
[pid 3469010] 14:48:40.046256 --- SIGINT {si_signo=SIGINT, si_code=SI_USER, si_pid=2707461, si_uid=1000} ---
[pid 3469010] 14:48:40.046354 write(2, "Signal\n", 7Signal
) = 7
[pid 3469010] 14:48:40.046588 rt_sigreturn({mask=[]}) = -1 EINTR (Interrupted system call)
[pid 3469010] 14:48:40.046821 futex(0x7f4e5c16b9d0, FUTEX_WAIT, 3469011, NULL) = ? ERESTARTSYS (To be restarted if SA_RESTART is set)
vmario
  • 411
  • 3
  • 15
  • 1
    `cerr << "Signal: " << sig << endl;` -- Game Over. Nothing in iostreams is signal-safe. You can't do this in a signal handler. – Sam Varshavchik Mar 17 '21 at 12:58
  • @SamVarshavchik I doubt that's the problem here though. Seems like the signal is getting delivered to the main thread – user253751 Mar 17 '21 at 13:07
  • Oh, I have absolutely no doubt that the signal is "getting delivered". That's not what the issue is. – Sam Varshavchik Mar 17 '21 at 13:09
  • @SamVarshavchik I've changed signal handler and now I use signal-safe `write()` but it doesn't change anything (even when I used an empty handler). – vmario Mar 17 '21 at 13:12
  • 2
    https://unix.stackexchange.com/questions/225687 is there useful background material in there?Which also point back to SO https://stackoverflow.com/questions/11679568 – Jeffrey Mar 17 '21 at 13:22
  • Except that now, undefined behavior is out of the question, and won't interfere. The next step I would take is run this under `strace -f`, and see what comes out. – Sam Varshavchik Mar 17 '21 at 13:23
  • @SamVarshavchik OK, I added differences in `strace` output. – vmario Mar 17 '21 at 14:04

2 Answers2

1

Do I have to block signals on the main thread to handle cancel point on another thread?

You need to allow (unmask) signals in only those threads expected to handle them, and block them in others.

The OS will deliver a process-directed signal to any thread that can receive it. Your terminal's SIGINT is sent to each process in the foreground process group, and the OS decides which thread of each will receive it.

If you only have two threads, and one of them has atomically unmasked SIGINT in a pselect while the other has SIGINT blocked, then the OS will deliver the SIGINT to the former. If both (or neither) can handle a SIGINT, the OS will pick one of them.

Caveat: your code may "miss" a SIGINT generated when both threads have INT masked:

time  thr1        thr2
----  ----------  ------
  0   block(INT)   - 
  1   run thread  (awake)    <---- SIGINT
  3   join()      pselect()
  4   ...         ...

If the signal arrives outside of thr2's pselect, the OS will find that both threads have the signal blocked. In that case, the OS can choose whichever thread it likes for the signal to be held pending, and could choose thr1, which will never unblock. The SIGINT will be missed.

That may be fine for your application, or it may not be.

pilcrow
  • 56,591
  • 13
  • 94
  • 135
  • So I have a problem because `sigaction()` connects signal handler to all threads and `sigprocmask()` helps because it disables signal only in current thread? – vmario Mar 18 '21 at 08:28
  • Signal disposition is global, whereas signals masks are thread-specific (and inherited). – pilcrow Mar 18 '21 at 13:04
0

As you noticed, my problem was that sigaction() has been connect signal handler to both main() thread and runSelect() thread so SIGINT signal could be caught by main().

Now I have prepared a version in which only the main thread handles SIGINT signal and sends SIGUSR1 signal to specific threads with pthread_kill().

#include <signal.h>
#include <unistd.h>

#include <iostream>
#include <thread>
#include <chrono>

using namespace std;

pthread_t nativeHandle;

class RaiiObject
{
public:
    RaiiObject() { cerr << "RaiiObject ctor" << endl; }
    ~RaiiObject() { cerr << "RaiiObject dtor" << endl; }
};

static void sigintHandler(int)
{
    write(2, "INT\n", 4);
    pthread_kill(nativeHandle, SIGUSR1);
}

static void sigusrHandler(int)
{
    write(2, "USR\n", 4);
}

static void blockSigint()
{
    sigset_t blockset;

    sigemptyset(&blockset);
    sigaddset(&blockset, SIGINT);
    sigprocmask(SIG_BLOCK, &blockset, NULL);
}

static void setSigintHandler()
{
    struct sigaction sa;
    sa.sa_handler = sigintHandler;
    sa.sa_flags = 0;
    sigemptyset(&sa.sa_mask);
    sigaction(SIGINT, &sa, NULL);
}

static void setSigusrHandler()
{
    struct sigaction sa;
    sa.sa_handler = sigusrHandler;
    sa.sa_flags = 0;
    sigemptyset(&sa.sa_mask);
    sigaction(SIGUSR1, &sa, NULL);
}

void runSelect()
{
    sigset_t emptyset;
    sigemptyset(&emptyset);

    blockSigint();
    setSigusrHandler();

    RaiiObject RaiiObject{};
    fd_set fdRead;

    while (true) {
        cerr << "Loop iteration" << endl;
        FD_ZERO(&fdRead);
        FD_SET(0, &fdRead);
        while (true) {
            if (pselect(FD_SETSIZE, &fdRead, NULL, NULL, NULL, &emptyset) > 0) {
                cerr << "Select" << endl;
                return;
            } else {
                cerr << "Select break" << endl;
                return;
            }
        }
    }
}

int main()
{
    cerr << "Main start" << endl;

    cerr << "Thread start" << endl;
    thread runSelectThread{runSelect};
    nativeHandle = runSelectThread.native_handle();
    setSigintHandler();
    runSelectThread.join();

    cerr << "Main exit" << endl;

    return EXIT_SUCCESS;
}
Main start
Thread start
RaiiObject ctor
Loop iteration
^CINT
USR
Select break
RaiiObject dtor
Main exit
vmario
  • 411
  • 3
  • 15