4

[EDIT: added MCVE in the text, clarifications]

I have the following program that sets RLIMIT_CPU to 2 seconds using setrlimit() and catches the signal. RLIMIT_CPU limits CPU time. «When the process reaches the soft limit, it is sent a SIGXCPU signal. The default action for this signal is to terminate the process. However, the signal can be caught, and the handler can return control to the main program.» (man)

The following program sets RLIMIT_CPU and a signal handler for SIGXCPU, then it generates random numbers until SIGXCPU gets raised, the signal handler simply exits the program.

test_signal.cpp

/*
 * Test program for signal handling on CMS.
 *
 * Compile with:
 *     /usr/bin/g++ [-DDEBUG] -Wall -std=c++11 -O2 -pipe -static -s \
 *                  -o test_signal test_signal.cpp
 *
 * The option -DDEBUG activates some debug logging in the helpers library. 
*/
#include <iostream>
#include <fstream>
#include <random>
#include <chrono>
#include <iostream>
#include <unistd.h>
#include <csignal>
#include <sys/time.h>
#include <sys/resource.h>

using namespace std;

namespace helpers {
    long long start_time = -1;
    volatile sig_atomic_t timeout_flag = false;
    unsigned const timelimit = 2;   // soft limit on CPU time (in seconds)

    void setup_signal(void);
    void setup_time_limit(void);

    static void signal_handler(int signum);

    long long get_elapsed_time(void);
    bool has_reached_timeout(void);

    void setup(void);
}

namespace {
   unsigned const minrand = 5;
   unsigned const maxrand = 20;
   int const numcycles = 5000000;
};


/*
 * Very simple debugger, enabled at compile time with -DDEBUG.
 * If enabled, it prints on stderr, otherwise it does nothing (it does not
 * even evaluate the expression on its right-hand side).
 *
 * Main ideas taken from:
 *   - C++ enable/disable debug messages of std::couts on the fly
 *     (https://stackoverflow.com/q/3371540/2377454)
 *   - Standard no-op output stream
 *     (https://stackoverflow.com/a/11826787/2377454)
*/
#ifdef DEBUG
#define debug true
#else
#define debug false
#endif


#define debug_logger if (!debug) \
                        {} \
                     else \
                        cerr << "[DEBUG] helpers::"

// conversion factor betwen seconds and nanoseconds
#define NANOS 1000000000

// signal to handle
#define SIGNAL SIGXCPU
#define TIMELIMIT RLIMIT_CPU

/*
 * This could be a function factory where and a closure of the signal-handling
 * function so that we could explicitly pass the output ofstream and close it.
 * C++ support closures only for lambdas, alas, at the moment we also need
 * the signal-handling function to be a pointer to a function and lambaa are
 * a different object that can not be converted. See:
 *  - Passing lambda as function pointer
 *    (https://stackoverflow.com/a/28746827/2377454)
*/
void helpers::signal_handler(int signum) {
    helpers::timeout_flag = true;

    debug_logger << "signal_handler:\t" << "signal " << signum \
                 << " received" << endl;
    debug_logger << "signal_handler:\t" << "exiting after " \
                 << helpers::get_elapsed_time() << " microseconds" << endl;

    exit(0);
}


/*
 * Set function signal_handler() as handler for SIGXCPU using sigaction. See
 *   - https://stackoverflow.com/q/4863420/2377454
 *   - https://stackoverflow.com/a/17572787/2377454
*/
void helpers::setup_signal() {
    debug_logger << "set_signal:\t" << "set_signal() called" << endl;

    struct sigaction new_action;

    //Set the handler in the new_action struct
    new_action.sa_handler = signal_handler;

    // Set to empty the sa_mask. It means that no signal is blocked
    // while the handler run.
    sigemptyset(&new_action.sa_mask);

    // Block the SIGXCPU signal, while the handler run, SIGXCPU is ignored.
    sigaddset(&new_action.sa_mask, SIGNAL);

    // Remove any flag from sa_flag
    new_action.sa_flags = 0;

    // Set new action
    sigaction(SIGNAL,&new_action,NULL);

    if(debug) {
        struct sigaction tmp;
        // read the old signal associated to SIGXCPU
        sigaction(SIGNAL, NULL, &tmp);
        debug_logger << "set_signal:\t" << "action.sa_handler: " \
                     << tmp.sa_handler << endl;

    }

    return;
}


/*
 * Set soft CPU time limit.
 * RLIMIT_CPU set teg CPU time limit in seconds..
 * See:
 *   - https://www.go4expert.com/articles/
 *         getrlimit-setrlimit-control-resources-t27477/
 *   - https://gist.github.com/Leporacanthicus/11086960
*/
void helpers::setup_time_limit(void) {
    debug_logger << "set_limit:\t\t" << "set_limit() called" << endl;

    struct rlimit limit;
    if(getrlimit(TIMELIMIT, &limit) != 0) {
        perror("error calling getrlimit()");
        exit(EXIT_FAILURE);
    }

    limit.rlim_cur = helpers::timelimit;

    if(setrlimit(TIMELIMIT, &limit) != 0) {
        perror("error calling setrlimit()");
        exit(EXIT_FAILURE);
    }

    if (debug) {
        struct rlimit tmp;
        getrlimit(TIMELIMIT, &tmp);
        debug_logger << "set_limit:\t\t" << "current limit: " << tmp.rlim_cur \
                     << " seconds" << endl;
    }

    return;
}


void helpers::setup(void) {
    struct timespec start;
    if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &start)) {
        exit(EXIT_FAILURE);
    }

    start_time = start.tv_sec*NANOS + start.tv_nsec;

    setup_signal();
    setup_time_limit();

    return;
}


long long helpers::get_elapsed_time(void) {

    struct timespec current;
    if (clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &current)) {
        exit(EXIT_FAILURE);
    }

    long long current_time = current.tv_sec*NANOS + current.tv_nsec;
    long long elapsed_micro = (current_time - start_time)/1000 + \
                                ((current_time - start_time) % 1000 >= 500);

    return elapsed_micro;
}


bool helpers::has_reached_timeout(void) {
    return helpers::timeout_flag;
}


int main() {
    helpers::setup();

    ifstream in("input.txt");
    in.close();

    ofstream out("output.txt");

    random_device rd;
    mt19937 eng(rd());
    uniform_int_distribution<> distr(minrand, maxrand);

    int i = 0;
    while(!helpers::has_reached_timeout()) {
        int nmsec;

        for(int n=0; n<numcycles; n++) {
            nmsec = distr(eng);
        }

        cout << "i: " << i << "\t- nmsec: " << nmsec << "\t- ";
        out << "i: " << i << "\t- nmsec: " << nmsec << "\t- ";

        cout << "program has been running for " << \
            helpers::get_elapsed_time() << " microseconds" << endl;

        out << "program has been running for " << \
            helpers::get_elapsed_time() << " microseconds" << endl;

        i++;
    }

   return 0;
}

I compile it as follows:

/usr/bin/g++ -DDEBUG -Wall -std=c++11 -O2 -pipe -static -s -o test_signal test_signal.cpp

On my laptop it correctly gets a SIGXCPU after 2 seconds, see the output:

$ /usr/bin/time -v  ./test_signal                                                                     
[DEBUG] helpers::set_signal:    set_signal() called
[DEBUG] helpers::set_signal:    action.sa_handler: 1
[DEBUG] helpers::set_limit:     set_limit() called
[DEBUG] helpers::set_limit:     current limit: 2 seconds
i: 0    - nmsec: 11 - program has been running for 150184 microseconds
i: 1    - nmsec: 18 - program has been running for 294497 microseconds
i: 2    - nmsec: 9  - program has been running for 422220 microseconds
i: 3    - nmsec: 5  - program has been running for 551882 microseconds
i: 4    - nmsec: 20 - program has been running for 685373 microseconds
i: 5    - nmsec: 16 - program has been running for 816642 microseconds
i: 6    - nmsec: 9  - program has been running for 951208 microseconds
i: 7    - nmsec: 20 - program has been running for 1085614 microseconds
i: 8    - nmsec: 20 - program has been running for 1217199 microseconds
i: 9    - nmsec: 12 - program has been running for 1350183 microseconds
i: 10   - nmsec: 17 - program has been running for 1486431 microseconds
i: 11   - nmsec: 13 - program has been running for 1619845 microseconds
i: 12   - nmsec: 20 - program has been running for 1758074 microseconds
i: 13   - nmsec: 11 - program has been running for 1895408 microseconds
[DEBUG] helpers::signal_handler:    signal 24 received
[DEBUG] helpers::signal_handler:    exiting after 2003326 microseconds
    Command being timed: "./test_signal"
    User time (seconds): 1.99
    System time (seconds): 0.00
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:02.01
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1644
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 59
    Voluntary context switches: 1
    Involuntary context switches: 109
    Swaps: 0
    File system inputs: 0
    File system outputs: 16
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

If I compile and run in a virtual machine (VirtualBox, running Ubuntu), I get this:

$ /usr/bin/time -v  ./test_signal                                                                     
[DEBUG] helpers::set_signal:    set_signal() called
[DEBUG] helpers::set_signal:    action.sa_handler: 1
[DEBUG] helpers::set_limit:     set_limit() called
[DEBUG] helpers::set_limit:     current limit: 2 seconds
i: 0    - nmsec: 12 - program has been running for 148651 microseconds
i: 1    - nmsec: 13 - program has been running for 280494 microseconds
i: 2    - nmsec: 7  - program has been running for 428390 microseconds
i: 3    - nmsec: 5  - program has been running for 580805 microseconds
i: 4    - nmsec: 10 - program has been running for 714362 microseconds
i: 5    - nmsec: 19 - program has been running for 846853 microseconds
i: 6    - nmsec: 20 - program has been running for 981253 microseconds
i: 7    - nmsec: 7  - program has been running for 1114686 microseconds
i: 8    - nmsec: 7  - program has been running for 1249530 microseconds
i: 9    - nmsec: 12 - program has been running for 1392096 microseconds
i: 10   - nmsec: 20 - program has been running for 1531859 microseconds
i: 11   - nmsec: 19 - program has been running for 1667021 microseconds
i: 12   - nmsec: 13 - program has been running for 1818431 microseconds
i: 13   - nmsec: 17 - program has been running for 1973182 microseconds
i: 14   - nmsec: 7  - program has been running for 2115423 microseconds
i: 15   - nmsec: 20 - program has been running for 2255140 microseconds
i: 16   - nmsec: 13 - program has been running for 2394162 microseconds
i: 17   - nmsec: 10 - program has been running for 2528274 microseconds
i: 18   - nmsec: 15 - program has been running for 2667978 microseconds
i: 19   - nmsec: 8  - program has been running for 2803725 microseconds
i: 20   - nmsec: 9  - program has been running for 2940610 microseconds
i: 21   - nmsec: 19 - program has been running for 3075349 microseconds
i: 22   - nmsec: 14 - program has been running for 3215255 microseconds
i: 23   - nmsec: 5  - program has been running for 3356515 microseconds
i: 24   - nmsec: 5  - program has been running for 3497369 microseconds
[DEBUG] helpers::signal_handler:    signal 24 received
[DEBUG] helpers::signal_handler:    exiting after 3503271 microseconds
    Command being timed: "./test_signal"
    User time (seconds): 3.50
    System time (seconds): 0.00
    Percent of CPU this job got: 99%
    Elapsed (wall clock) time (h:mm:ss or m:ss): 0:03.52
    Average shared text size (kbytes): 0
    Average unshared data size (kbytes): 0
    Average stack size (kbytes): 0
    Average total size (kbytes): 0
    Maximum resident set size (kbytes): 1636
    Average resident set size (kbytes): 0
    Major (requiring I/O) page faults: 0
    Minor (reclaiming a frame) page faults: 59
    Voluntary context switches: 0
    Involuntary context switches: 106
    Swaps: 0
    File system inputs: 0
    File system outputs: 16
    Socket messages sent: 0
    Socket messages received: 0
    Signals delivered: 0
    Page size (bytes): 4096
    Exit status: 0

Even running the binary compiled on my laptop, the process gets killed after around 3 seconds of elapsed user time.

Any idea of what could be causing this? For a broader context see, this thread: https://github.com/cms-dev/cms/issues/851

CristianCantoro
  • 722
  • 1
  • 7
  • 17

0 Answers0