4

In a C program, I am using PTHREAD_CANCEL_ASYNCHRONOUS to cancel the thread immediately, as soon as the pthread_cancel is fired from the parent thread. But it is causing the whole process to get crash with Segmentation Fault. The job of child thread is to get some data from a database server. And my logic is that if it doesnt get data within 10 seconds, the thread should get killed from the parent thread.

I want only to kill the child thread, not the whole process.

struct str_thrd_data
{
        SQLHANDLE hstmt;
        int rc;
        bool thrd_completed_flag;
};


void * str_in_thread_call(void *in_str_arg)
{
        int thrd_rc;
        struct str_thrd_data *str_arg;
        str_arg = in_str_arg;

        thrd_rc = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
        if (thrd_rc != 0)
               handle_error_en(thrd_rc, "pthread_setcancelstate");

        thrd_rc = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
        if (thrd_rc != 0)
               handle_error_en(thrd_rc, "pthread_setcancelstate");

        thrd_rc = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, NULL);
        if (thrd_rc != 0)
               handle_error_en(thrd_rc, "pthread_setcanceltype");
        // Code to call SQL Dynamic Query from a Database Server. This takes time more than 10 seconds.
      thrd_rc = SQLExecute(hstmt);
        printf("\n*********************Normal Thread termination withing timelimit %d\n",str_arg->rc);
        str_arg->thrd_completed_flag = true;

}

int main()
{
        printf("\nPJH: New THread created.\n");
        pthread_attr_t tattr;
        pthread_t th;
        size_t mysize = 1;

        struct str_thrd_data atd;

        atd.hstmt = hstmt;
        atd.rc= rc;
        atd.thrd_completed_flag = false;

        thrd_rc = pthread_attr_init(&tattr);
        thrd_rc = pthread_attr_setstacksize(&tattr, mysize);
        thrd_rc = pthread_create(&th, &tattr, &str_in_thread_call, &atd);
        if (thrd_rc != 0)
               handle_error_en(thrd_rc, "pthread_create");

        // While Loop tp count till 10 seconds.
        while(timeout !=0)
        {
                printf("%d Value of rc=%d\n",timeout, atd.rc);
                if(atd.rc != 999) break;
                timeout--;
                usleep(10000);
        }
        rc = atd.rc;
        //Condition to check if thread is completed or not yet.
          if(atd.thrd_completed_flag == false)
        {
                //Thread not comepleted within time, so Kill it now.
                printf("PJH ------- 10 Seconds Over\n");
                thrd_rc = pthread_cancel(th);
                printf("PJH ------- Thread Cancelled Immediately \n");    
                if (thrd_rc != 0)
                {
                       handle_error_en(thrd_rc, "pthread_cancel");
                }
                printf("\nPJH &&&&&&&& Thread Cancelled Manually\n");
        }
        thrd_rc = pthread_join(th,NULL);
        // some other job .....
}

gdb process_name corefile shows the below backtrace:- Mostly all SQL Library functions.

#0  0xffffe410 in __kernel_vsyscall ()
#1  0x0059fe30 in raise () from /lib/libc.so.6
#2  0x005a1741 in abort () from /lib/libc.so.6
#3  0xdef3f5d7 in ?? () from /usr/lib/libstdc++.so.5
#4  0xdef3f624 in std::terminate() () from /usr/lib/libstdc++.so.5
#5  0xdef3f44c in __gxx_personality_v0 () from /usr/lib/libstdc++.so.5
#6  0x007e1917 in ?? () from /lib/libgcc_s.so.1
#7  0x007e1c70 in _Unwind_ForcedUnwind () from /lib/libgcc_s.so.1
#8  0x007cda46 in _Unwind_ForcedUnwind () from /lib/libpthread.so.0
#9  0x007cb471 in __pthread_unwind () from /lib/libpthread.so.0
#10 0x007c347a in sigcancel_handler () from /lib/libpthread.so.0
#11 <signal handler called>
#12 0xffffe410 in __kernel_vsyscall ()
#13 0x0064decb in semop () from /lib/libc.so.6
#14 0xe0245901 in sqloSSemP () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#15 0xe01e7f3c in sqlccipcrecv(sqlcc_comhandle*, sqlcc_cond*) () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#16 0xe03fe135 in sqlccrecv () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#17 0xe02a0307 in sqljcReceive(sqljCmnMgr*) () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#18 0xe02d0ba3 in sqljrReceive(sqljrDrdaArCb*, db2UCinterface*) () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#19 0xe02c510d in sqljrDrdaArExecute(db2UCinterface*, UCstpInfo*) () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#20 0xe01392bc in CLI_sqlCallProcedure(CLI_STATEMENTINFO*, CLI_ERRORHEADERINFO*) () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#21 0xe00589c7 in SQLExecute2(CLI_STATEMENTINFO*, CLI_ERRORHEADERINFO*) () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#22 0xe0050fc9 in SQLExecute () from /opt/IBM/db2/V9.1/lib32/libdb2.so.1
#23 0x080a81f7 in apcd_in_thread_call (in_apcd_arg=0xbc8e8f34) at dcs_db2_execute.c:357
#24 0x007c4912 in start_thread () from /lib/libpthread.so.0
#25 0x0064c60e in clone () from /lib/libc.so.6
CodeCodeCode
  • 459
  • 1
  • 12
  • 21
  • Check [this](http://stackoverflow.com/questions/3822674/for-pthread-how-to-kill-child-thread-from-the-main-thread) and [this](http://stackoverflow.com/questions/2084830/kill-thread-in-pthread-library) – Suvarna Pattayil Apr 29 '13 at 14:03

2 Answers2

8

Asynchronous thread cancellation can only be safely used on threads which perform a very restricted set of operations — the official rules are long and confusing, but in effect threads subject to async cancels can only perform pure computation. They can't do I/O, they can't allocate memory, they can't take locks of any kind, and they can't call any library function that might do any of the above. There is no way it is safe to apply async cancels to a thread that talks to a database.

Deferred cancellation is less restricted, but is still extremely finicky. If your database library is not coded to cope with the possibility that the calling thread might be cancelled mid-operation — and it probably isn't — then you can't safely use deferred cancellation, either.

You will need to find some other mechanism for aborting queries which run too long.

EDIT: Since this is DB2 and the confusingly-named "CLI" API, try using SqlSetStmtAttr to set the SQL_ATTR_QUERY_TIMEOUT parameter on the prepared statement. This is the full list of parameters that can be set this way, and here is some more discussion of query timeouts.

SON OF EDIT: According to a friend who has done a lot more database work than me, it is quite likely that there is a server-side mechanism for cancelling slow queries regardless of their source. If this exists in DB2 it may be more convenient than manually setting timeouts on all your queries client-side, especially as it may be able to log slow queries so you know which ones they are and can optimize them.

zwol
  • 135,547
  • 38
  • 252
  • 361
  • 1
    +1 for generally good information, but async-signal-safe is not sufficient. Even calling async-signal-safe functions under asynchronous cancellation mode invokes UB. POSIX only defines three async-**cancel**-safe functions, and they are `pthread_setcancelstate`, `pthread_setcanceltype`, and `pthread_cancel`. So really, the only thing you can do in asynchronous cancellation mode is pure computation, cancelling a thread, or turning off asynchronous cancellation. (Source: XSH 2.9.5) – R.. GitHub STOP HELPING ICE Apr 29 '13 at 14:07
  • @R.. This is what I get for knowing a bit too much about the guts of pthreads; on at least some implementations it *is* (perhaps unofficially, but still) safe to use any async-signal-safe function from a thread in async cancellation mode, and I didn't bother checking the standard. Answer corrected. – zwol Apr 29 '13 at 15:14
  • @Zack I am trying to connect to DB2 Database server V9.7. and the statement I use to retrieve data is `rc = SQLExecute(hstmt);` where hstmt is a dynamically prepared SQL statement. – CodeCodeCode Apr 29 '13 at 21:45
  • @PalLoveCoding I dug into the DB2 documentation a little and found something that should work, see edits. – zwol Apr 29 '13 at 22:58
  • @Zack Thanks for the `SQL_ATTR_QUERY_TIMEOUT` option suggestion. But here I got to know a new thing, that the DB2 installed on the Mainframe does not support the TIMEOUT feature. Now I am thinking of any other approach. – CodeCodeCode May 07 '13 at 09:49
  • 1
    Your best option in that case is to find the people in charge of the mainframe and bribe, blackmail, or otherwise coerce them into turning the TIMEOUT feature (back) on. I am not joking even a little. – zwol May 07 '13 at 13:01
2

Since the database client code is probably not written in such a way that it can deal with cancellation (most library code isn't), I don't think this approach will work. See Zack's answer for details.

If you need to be able to cancel database connections, you will probably have to proxy the connection and kill the proxy. Basically, what you would do is create a second thread that listens on a port and forwards the connection to the database server, and direct your database client to connect to this port on localhost instead of the real database server/port. The proxy thread could then be cancellable (with normal deferred cancellation, not asynchronous), with a cancellation cleanup handler to shutdown the sockets. Losing connection to the database server via a closed socket (rather than just a non-responsive socket) should cause the database client library code to return with an error, and you can then have its thread exit too.

Keep in mind when setting up such a proxy that you will need to make sure you don't introduce security issues with access to the database.

Here is a sketch of the code you could use for a proxy, without any error checking logic and without anything to account for unintended clients connecting:

int s, c;
struct addrinfo *ai;
struct sockaddr_in sa;
char portstr[8];
getaddrinfo(0, 0, &(struct addrinfo){ .ai_flags = AI_PASSIVE, .ai_family = AF_INET }, &ai);
s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
bind(s, ai->ai_addr, ai_addrlen);
freeaddrinfo(ai);
getsockname(s, (void *)&sa, &(socklen_t){sizeof sa});
port = ntohs(sa.sin_port);
/* Here, do something to pass the port (assigned by kernel) back to the caller. */
listen(s, 1);
c = accept(s, &sa, &(socklen_t){sizeof sa});
close(s);
getaddrinfo("dbserver", "dbport", 0, &ai);
s = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol);
connect(s, ai->ai_addr, ai->ai_addrlen);
freeaddrinfo(ai);

At this point, you have two sockets, s connected to the database server, and c connected to the database client in another thread of your program. Whatever you read from one should be written to the other; use poll to detect which one is ready for reading or writing.

During the above setup code, cancellation should be blocked except around the accept and connect calls, and at those points, you need appropriate cleanup handlers to close your sockets and call freeaddrinfo if cancellation happens. It might make sense to copy the data you're using from getaddrinfo to local variables so you can freeaddrinfo before the blocking calls and not have to worry about doing it from a cancellation cleanup handler.

R.. GitHub STOP HELPING ICE
  • 208,859
  • 35
  • 376
  • 711
  • I also thought of this second thread approach, but I dont have hands on on Ports/sockets communication in C. Could you please provide me any link or any material to go through. – CodeCodeCode May 07 '13 at 09:52
  • I added a *sketch* (completely untested) of the code that would be involved in setting up a proxy. I would just point you to a sockets programming guide, but I'm not sure which ones are teaching bad practices, and finding one that's not would probably take more time than it took me to write that sketch... You could open a new SO question asking how to do this and pointing at my answer as a starting point. – R.. GitHub STOP HELPING ICE May 07 '13 at 16:13