1

so I'm updating the code again as recommended by useless so that you can see something that you can compile and run. I cleaned the code a bit from other things, but the problem remains.Executing it with valgrind when I give the SIGINT signal the program ends by printing the terminated threads correctly, but sometimes it ends without memory leaks, others reporting some memory leaks that I leave behind. At the moment I am doing tests without connected clients.


I am trying to create a multithreaded server that uses the select() function and a Thread Pool for a university project. Executing my code with Valgrind I see that after sending a SIGINT signal, sometimes it terminates without errors, while other times it reports 4 "still reachable" errors.

I launch the program with valgrind specifying the following flags:

 valgrind --leak-check=full --show-leak-kinds=all

This is the new code

#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#include <signal.h>
#include <pthread.h>

#include <errno.h>
#include <sys/select.h>
#include <ctype.h>
#include <sys/types.h> 
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/time.h>

#define UNIXPATH "./tmp/socket"
#define THREADSINPOOL 8
#define MAXCONNECTION 32
#define SYSCALL(r,c,msg) \
    if((r=c)==-1) {perror(msg); exit(errno); }


typedef struct _elem{
    long connfd;
    struct _elem *next;
}elem;

static volatile sig_atomic_t terminate=0;

int try=0;
int nelem=0;
elem *head=NULL;
elem *corr=NULL;
elem *tmp=NULL;


pthread_mutex_t mtx=PTHREAD_MUTEX_INITIALIZER;  
pthread_cond_t empty=PTHREAD_COND_INITIALIZER;
pthread_cond_t full=PTHREAD_COND_INITIALIZER;

int update(fd_set set, int fdmax){
    for(int i=(fdmax-1);i>=0; i--)
        if(FD_ISSET(i,&set)) return i;
    return -1;
}

void *threadF(void *arg){
    pthread_t self;
    long connfd=0;

    while(1){
        pthread_mutex_lock(&mtx);
        if(try==1){
            pthread_mutex_unlock(&mtx);
            break;
        } 
        while(nelem==0 && try==0)
            pthread_cond_wait(&empty,&mtx);
        if(try==1){ 
            pthread_mutex_unlock(&mtx);
            break;
        }
        nelem--;

        tmp=head;
        connfd=tmp->connfd;
        tmp=tmp->next;
        free(head); 
        head=tmp;

        pthread_cond_signal(&full);
        pthread_mutex_unlock(&mtx);

        //read & write on tmp->connfd

        self = pthread_self();
        printf("tmp->connfd: %lu on thread: %lu\n", connfd,self);
        close(connfd);
    }
    pthread_exit(0);        
}

void insert(long connfd){
    pthread_mutex_lock(&mtx);
    while(nelem==MAXCONNECTION && try==0)
        pthread_cond_wait(&full,&mtx);
    if(try==1){ 
        pthread_mutex_unlock(&mtx);

    }else{
        nelem++;
        elem *new=malloc(sizeof(elem));
        new->connfd=connfd;
        new->next=NULL;
        if(head==NULL){
            head=new;
            corr=head;
        }else{
            corr->next=new;
            corr=corr->next;
        }
        pthread_cond_signal(&empty);
        pthread_mutex_unlock(&mtx);
    }
}

pthread_t *createarray(pthread_t *array){
    int i,err;
    pthread_t id;
    for(i=0;i<THREADSINPOOL;i++){
        if((err=pthread_create(&id,NULL,&threadF,NULL))!=0){
            fprintf(stderr,"thread\n");
            exit(errno);
        }
        array[i]=id;
    }
    return array;

}

void destroyArray(pthread_t *array){
    void* value=NULL;
    int i,tmp;
    for (i = 0; i < THREADSINPOOL; i++){
        if ((tmp=pthread_join(array[i],&value)) != 0)
                printf("error join: %d\n", tmp);
        printf("thread: %lu terminated\n",array[i]);
    }
    free(array);
}

void cleanup(){
    unlink(UNIXPATH);
}

void sigint_handler(int signmum){
    terminate=1;

}

int main(int argc, char *argv[]) {
    cleanup();
    atexit(cleanup);
    int notused;

    sigset_t setmask;
    SYSCALL(notused,sigemptyset(&setmask),"SIGEMPTYSET");
    SYSCALL(notused,sigaddset(&setmask,SIGINT),"SIGADDSET");
    SYSCALL(notused,pthread_sigmask(SIG_SETMASK,&setmask,NULL),"pthread_sigmask");

    //create threadspool
    pthread_t *array=malloc(THREADSINPOOL*sizeof(pthread_t));
    array=createarray(array);

    struct sigaction s;
    memset(&s,0,sizeof(s));

    s.sa_handler=sigint_handler;
    s.sa_flags=SA_RESTART;
    SYSCALL(notused,sigaction(SIGINT,&s,NULL),"SIGINT");

    SYSCALL(notused,sigemptyset(&setmask),"SIGEMPTYSET");
    SYSCALL(notused,pthread_sigmask(SIG_SETMASK,&setmask,NULL),"sigmask");


    int listenfd;
    SYSCALL(listenfd, socket(AF_UNIX, SOCK_STREAM, 0), "socket");

    struct sockaddr_un serv_addr;
    memset(&serv_addr, '0', sizeof(serv_addr));
    serv_addr.sun_family = AF_UNIX;
    strncpy(serv_addr.sun_path, UNIXPATH, strlen(UNIXPATH)+1);

    SYSCALL(notused, bind(listenfd, (struct sockaddr*)&serv_addr,sizeof(serv_addr)), "bind"); 
    SYSCALL(notused, listen(listenfd, MAXCONNECTION), "listen");

    long fd_c;
    int i=0; 

    fd_set set, rdset;

    FD_ZERO(&rdset);
    FD_ZERO (&set); 
    FD_SET(listenfd,&set); 

    int fd_num=listenfd; 

    struct timeval tv;
    tv.tv_sec=3;
    tv.tv_usec=0;

    while(1){
        if(terminate==1){
            break;
        }
        rdset=set; 
        if((notused=select(fd_num+1, &rdset, NULL, NULL, &tv))==-1){
            if(errno==EINTR){
                if(terminate==1){
                    break;
                }else{
                    perror("select");
                    exit(EXIT_FAILURE);
                }

            }

        }

        for(i=0; i<=fd_num;i++){
            if(FD_ISSET(i,&rdset)){
                if(i==listenfd){
                    SYSCALL(fd_c, accept(listenfd, (struct sockaddr*)NULL ,NULL), "accept");
                    printf("connection accepted\n");
                    FD_SET(fd_c, &set);
                    if(fd_c>fd_num) fd_num=fd_c;
                }else{
                    fd_c=i;
                }
                insert(fd_c);
                FD_CLR(fd_c,&set);
                if(fd_c==fd_num) fd_num=update(set,fd_num);

            }
        }
    }
    pthread_mutex_lock(&mtx);
    try=1;
    pthread_mutex_unlock(&mtx);
    close(listenfd);
    pthread_cond_broadcast(&empty);
    pthread_cond_signal(&full);
    // join on the thread and free(array)
    destroyArray(array);
    if(tmp)
        free(tmp);
    return 0;
}

Finally, this is the Valgrind output I sometimes get when I execute the code:

==7578== Memcheck, a memory error detector
==7578== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==7578== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==7578== Command: ./server
==7578== 
^Cthread: 100800256 terminated
thread: 109192960 terminated
thread: 117585664 terminated
thread: 125978368 terminated
thread: 134371072 terminated
thread: 142763776 terminated
thread: 151156480 terminated
thread: 159549184 terminated
==7578== 
==7578== HEAP SUMMARY:
==7578==     in use at exit: 1,638 bytes in 4 blocks
==7578==   total heap usage: 15 allocs, 11 frees, 4,958 bytes allocated
==7578== 
==7578== 36 bytes in 1 blocks are still reachable in loss record 1 of 4
==7578==    at 0x4C2DB8F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==7578==    by 0x401CF99: strdup (strdup.c:42)
==7578==    by 0x40187DE: _dl_load_cache_lookup (dl-cache.c:311)
==7578==    by 0x4009168: _dl_map_object (dl-load.c:2364)
==7578==    by 0x4015576: dl_open_worker (dl-open.c:237)
==7578==    by 0x4010563: _dl_catch_error (dl-error.c:187)
==7578==    by 0x4014DA8: _dl_open (dl-open.c:660)
==7578==    by 0x519A5AC: do_dlopen (dl-libc.c:87)
==7578==    by 0x4010563: _dl_catch_error (dl-error.c:187)
==7578==    by 0x519A663: dlerror_run (dl-libc.c:46)
==7578==    by 0x519A663: __libc_dlopen_mode (dl-libc.c:163)
==7578==    by 0x4E4B91A: pthread_cancel_init (unwind-forcedunwind.c:52)
==7578==    by 0x4E4BB03: _Unwind_ForcedUnwind (unwind-forcedunwind.c:126)
==7578== 
==7578== 36 bytes in 1 blocks are still reachable in loss record 2 of 4
==7578==    at 0x4C2DB8F: malloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==7578==    by 0x400BEF3: _dl_new_object (dl-object.c:165)
==7578==    by 0x400650C: _dl_map_object_from_fd (dl-load.c:1028)
==7578==    by 0x4008C26: _dl_map_object (dl-load.c:2498)
==7578==    by 0x4015576: dl_open_worker (dl-open.c:237)
==7578==    by 0x4010563: _dl_catch_error (dl-error.c:187)
==7578==    by 0x4014DA8: _dl_open (dl-open.c:660)
==7578==    by 0x519A5AC: do_dlopen (dl-libc.c:87)
==7578==    by 0x4010563: _dl_catch_error (dl-error.c:187)
==7578==    by 0x519A663: dlerror_run (dl-libc.c:46)
==7578==    by 0x519A663: __libc_dlopen_mode (dl-libc.c:163)
==7578==    by 0x4E4B91A: pthread_cancel_init (unwind-forcedunwind.c:52)
==7578==    by 0x4E4BB03: _Unwind_ForcedUnwind (unwind-forcedunwind.c:126)
==7578== 
==7578== 384 bytes in 1 blocks are still reachable in loss record 3 of 4
==7578==    at 0x4C2FB55: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==7578==    by 0x40120BD: _dl_check_map_versions (dl-version.c:293)
==7578==    by 0x4015B18: dl_open_worker (dl-open.c:286)
==7578==    by 0x4010563: _dl_catch_error (dl-error.c:187)
==7578==    by 0x4014DA8: _dl_open (dl-open.c:660)
==7578==    by 0x519A5AC: do_dlopen (dl-libc.c:87)
==7578==    by 0x4010563: _dl_catch_error (dl-error.c:187)
==7578==    by 0x519A663: dlerror_run (dl-libc.c:46)
==7578==    by 0x519A663: __libc_dlopen_mode (dl-libc.c:163)
==7578==    by 0x4E4B91A: pthread_cancel_init (unwind-forcedunwind.c:52)
==7578==    by 0x4E4BB03: _Unwind_ForcedUnwind (unwind-forcedunwind.c:126)
==7578==    by 0x4E4A06F: __pthread_unwind (unwind.c:121)
==7578==    by 0x4E42844: __do_cancel (pthreadP.h:283)
==7578==    by 0x4E42844: pthread_exit (pthread_exit.c:28)
==7578== 
==7578== 1,182 bytes in 1 blocks are still reachable in loss record 4 of 4
==7578==    at 0x4C2FB55: calloc (in /usr/lib/valgrind/vgpreload_memcheck-amd64-linux.so)
==7578==    by 0x400BBF5: _dl_new_object (dl-object.c:75)
==7578==    by 0x400650C: _dl_map_object_from_fd (dl-load.c:1028)
==7578==    by 0x4008C26: _dl_map_object (dl-load.c:2498)
==7578==    by 0x4015576: dl_open_worker (dl-open.c:237)
==7578==    by 0x4010563: _dl_catch_error (dl-error.c:187)
==7578==    by 0x4014DA8: _dl_open (dl-open.c:660)
==7578==    by 0x519A5AC: do_dlopen (dl-libc.c:87)
==7578==    by 0x4010563: _dl_catch_error (dl-error.c:187)
==7578==    by 0x519A663: dlerror_run (dl-libc.c:46)
==7578==    by 0x519A663: __libc_dlopen_mode (dl-libc.c:163)
==7578==    by 0x4E4B91A: pthread_cancel_init (unwind-forcedunwind.c:52)
==7578==    by 0x4E4BB03: _Unwind_ForcedUnwind (unwind-forcedunwind.c:126)
==7578== 
==7578== LEAK SUMMARY:
==7578==    definitely lost: 0 bytes in 0 blocks
==7578==    indirectly lost: 0 bytes in 0 blocks
==7578==      possibly lost: 0 bytes in 0 blocks
==7578==    still reachable: 1,638 bytes in 4 blocks
==7578==         suppressed: 0 bytes in 0 blocks
==7578== 
==7578== For counts of detected and suppressed errors, rerun with: -v
==7578== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)

New Valgrind output

==3920== Memcheck, a memory error detector
==3920== Copyright (C) 2002-2015, and GNU GPL'd, by Julian Seward et al.
==3920== Using Valgrind-3.11.0 and LibVEX; rerun with -h for copyright info
==3920== Command: ./server
==3920== 
^C
==3920== 
==3920== Process terminating with default action of signal 2 (SIGINT)
==3920==    at 0x4E3FE6D: __pthread_initialize_minimal (nptl-init.c:433)
==3920==    by 0x4E3F588: ??? (in /lib/x86_64-linux-gnu/libpthread-2.23.so)
==3920==    by 0x400044F: ??? (in /lib/x86_64-linux-gnu/ld-2.23.so)
==3920==    by 0x4010679: call_init.part.0 (dl-init.c:58)
==3920==    by 0x4010834: call_init (dl-init.c:104)
==3920==    by 0x4010834: _dl_init (dl-init.c:87)
==3920==    by 0x4000C69: ??? (in /lib/x86_64-linux-gnu/ld-2.23.so)
==3920== 
==3920== HEAP SUMMARY:
==3920==     in use at exit: 0 bytes in 0 blocks
==3920==   total heap usage: 0 allocs, 0 frees, 0 bytes allocated
==3920== 
==3920== All heap blocks were freed -- no leaks are possible
==3920== 
==3920== For counts of detected and suppressed errors, rerun with: -v
==3920== ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
JayJona
  • 469
  • 1
  • 16
  • 41
  • 1
    Fyi, your thread proc logic is wrong. It checks the predicate without protection in the outer while loop (which as near as I can see, is pointless anyway unless `terminate` can take on more than the values `1` and `0`). Likewise, `terminate=1;` should not be done without protection either. Never **ever**, change, *nor check* predicate data managed by cvar+mtx without the mutex locked; *ever*. – WhozCraig Aug 30 '18 at 16:50
  • @WhozCraig do you mean that I should acquire the lock before the outer while? Because the lock itself is meant to protect the nelem variable and other things that I did not include in the code I posted. – JayJona Aug 30 '18 at 16:59
  • 2
    whatever else it protects, it at least *appears* that `terminate` is part your conditional predicate data. If that is the case, it cannot be used *anywhere* without mutual exclusion protection. Doing otherwise is the cardinal sin of pthread cvar+mtx programming. – WhozCraig Aug 30 '18 at 17:06
  • @WhozCraig I moved the check for terminate inside the mutual exclusion region, the code is now updated. The thing is, I can't use lock/unlock mechanisms in the signal handler since they are not signal safe as far as I know. – JayJona Aug 30 '18 at 17:08
  • yeah, you're totally right on that. ttbomk pthread mutexes are not async-safe for signals. I'l stare at this more once I finish my hideous commute (assuming someone else doesn't come by and proffer up a solution. Have an uptick. – WhozCraig Aug 30 '18 at 17:18
  • 2
    Wait, so if `select` is interrupted _and_ your handler already ran in this thread (`EINTR && terminate` branch), then you broadcast again (which seems redundant since the signal handler already did that). But the `EINTR && !terminate` branch - which probably means the signal handler ran in a different thread - just exits without cleanly joining the threads. Is that right? Is it what you intended? – Useless Aug 30 '18 at 17:27
  • 2
    Here's my advice. 1) The signal handler should set `terminate = 1` and nothing else. Don't call `write` and definitely don't call `pthread_cond_broadcast` from the signal handler. 2) The only function that should look at `terminate` is `main`. None of the thread functions should be monitoring `terminate`. 3) When `main` sees that `terminate` has been set, it should take care of shutting down the threads. – user3386109 Aug 30 '18 at 17:30
  • Do you need to do anything? That 'still reachable: 1,638 bytes in 4 blocks' - does it ever get any bigger than that, or does it stay the same., no matter how long you run your server/clients for? – Martin James Aug 30 '18 at 18:24
  • regarding: `void *threadF(){` this is not a valid pthread function signature. Suggest: `void *threadF( void *arg )` – user3629249 Aug 30 '18 at 18:46
  • @Useless when in the `EINTR && !terminate` branch, the code executes a break and it exits the while loop. Main proceeds and executes destroyarray that performs the join. @user3386109 I updated the code accordingly to your suggestion but the problem still remains @user3629249 I updated the thread code. – JayJona Sep 01 '18 at 10:23
  • No, that's the other branch. I was asking about the else. – Useless Sep 01 '18 at 13:33
  • @Useless Then yes, I exit without cleaning and joining the threads. In reality, it was a case that I didn’t notice since I was focusing on the other branch of the if statement, also because I only have EINTR errors from select. When the program run through valgrind terminates and shows the leak, it first terminates the threads ( I checked by printing the Id of the terminated threads ) and then only sometimes generates the leak. – JayJona Sep 02 '18 at 10:12
  • Wait, so do you know which exit path was taken? And there was logging showing exactly what happened? That should all be in the question rather than buried in a comment, it's relevant to understanding what is happening. – Useless Sep 03 '18 at 09:19
  • @Useless I'm sorry if I forgot to add something but I thought I had been clear. The program always terminates and always prints the Id of the terminated threads, but sometimes when run through valgrind it terminates without leaks, other times it shows the leak I show above. I have to solve it because I am required not to have any memory leaks by my university. – JayJona Sep 03 '18 at 09:38
  • You don't need to justify to me why you want to fix a bug, but please please update your question. Your code isn't minimal (if you definitely know one branch isn't taken, you can just remove it and leave a comment), and it isn't complete so I can't try to reproduce the issue, and you've _described_ some logging that should ideally be shown in the question. – Useless Sep 03 '18 at 09:57
  • @Useless updated code – JayJona Sep 04 '18 at 15:30
  • Do be mindful that it is not a problem for a process to fail to deallocate memory when it terminates. The OS cleans up. It is probably worthwhile to figure out what's going on here, so as to assure yourself that it's not a larger problem, but it's probably not worthwhile to take extreme measures for the sake of ensuring termination-time cleanup alone. – John Bollinger Sep 04 '18 at 15:53
  • since it is a project for a university exam this problem could make me fail the valgrind test – JayJona Sep 04 '18 at 15:58

1 Answers1

1

but sometimes it ends without memory leaks, others reporting some memory leaks that I leave behind.

That's probably because the program does not clean up its linked list of accepted connections before terminating.

But you have some other troubling issues in your code.

  • First, your use of the terminate variable is not thread safe. Do not be confused by the name of type sig_atomic_t: it is not an atomic datatype. It is safe for use by a signal handler to communicate with the thread that received the signal, but it is not safe for communicating between threads without use of an appropriate synchronization object, such as a mutex. Moreover, although necessary for use with a signal handler, making it volatile also fails to confer thread safety. This is a problem for your current code because when you send a SIGINT, it can be handled by any thread.

    I suggest resolving this by blocking SIGINT in all but the main thread. When a process receives a process-directed signal, it is queued for a thread that does not at that time have it blocked, so you should not need to worry about successfully receiving the signal. Every thread has its own signal mask, whose initial value is inherited from its parent thread. Therefore, I suggest blocking SIGINT in the main thread before launching all the workers, then unblocking it only in the main thread once the workers are all running.

  • Second, all accesses to mutable, non-atomic data shared between threads need to protected by an appropriate synchronization object. You have chosen a mutex for that purpose, but your program performs some accesses without its protection.

    • The shared data include at least variables try, nelem, and all the contents of the linked list headed by head.

    • threadF() checks try without holding the mutex

    • main() sets try without holding the mutex

    The behavior of your program is therefore undefined. In practice, it seems unlikely that this has anything to do with the "leak", but it might contribute to your program occasionally failing to shut down cleanly. In principle, anything might happen. In practice, the most likely effect is that your program occasionally fails to shut down cleanly.

John Bollinger
  • 160,171
  • 8
  • 81
  • 157
  • ok I updated the code again trying to do the free on the elements of the list, putting the lock on try and blocking SIGINT but the error sometimes happens again. Also now sometimes a new error appears on valgrind. Execution terminates by not generating leaks but the threads do not seem to terminate since the printf does not seem to be executed, and I do not know whether to consider Process terminating with default action of signal 2 (SIGINT) as an error. Maybe I did not understand how to block the signals. – JayJona Sep 05 '18 at 13:10
  • @james, although signal *masks* are per-thread properties, signal *dispositions* are per-process properties. Thus, if you ever get default `SIGINT` handling then it must be because the signal is delivered before you install your handler. Suggested procedure: at program startup, before launching any additional threads, use `pthread_sigmask()` to block `SIGINT` for the main thread. After starting all the child threads, use `pthread_sigmask()`, in the parent thread only, to unblock `SIGINT` in that thread. *In between*, be sure to have the main thread use `sigaction()` to install the handler. – John Bollinger Sep 05 '18 at 13:17
  • in the code I updated I think I did just that in the first part of the main – JayJona Sep 05 '18 at 15:10