0

My problem is similar to this post but I followed the advice and I still get a stack overflow and broken pipe error Getting Segmentation Fault in C when using sleep with Pthreads I am modifying a simple webserver to use a thread pool as the backend I only really modified two functions. The main:

int main(int argc, char *argv[])
{
    index = 0;
    pthread_t listener;
    
        if(argc < 2 || atoi(argv[1]) < 2000 || atoi(argv[1]) > 50000)
        {
                fprintf(stderr, "./webserver PORT(2001 ~ 49999) (#_of_threads) (crash_rate(%))\n");
                return 0;
        }

        int i;
        
        // port number
        port = atoi(argv[1]);
        
        // # of worker thread
        if(argc > 2) 
                numThread = atoi(argv[2]);
        else numThread = 1;

        // crash rate
        if(argc > 3) 
                CRASH = atoi(argv[3]);
        if(CRASH > 50) CRASH = 50;
        
  sem_init(&sem_empty, 0, MAX_REQUEST);
  sem_init(&sem_full, 0, 0);

        int clock = 0;
        for( clock; clock < numThread; clock++){
            pthread_create(&thread_pool[clock], NULL, thread_function, NULL);
        }
        printf("[pid %d] CRASH RATE = %d\%\n", getpid(), CRASH);
        pthread_create(&listener, NULL, req_handler, NULL);
        //req_handler();
        pthread_join(listener, NULL);
        return 0;
}

This uses pthread_create to call my listener which also contains socket code I didn't touch:

void * req_handler(void *arg)
{
        //requestCount = 0;
        
        
        int r;
        int test;
        struct sockaddr_in sin;
        struct sockaddr_in peer;
        int peer_len = sizeof(peer);
        
        

        sock = socket(AF_INET, SOCK_STREAM, 0);

        sin.sin_family = AF_INET;
        sin.sin_addr.s_addr = INADDR_ANY;
        sin.sin_port = htons(port);
        r = bind(sock, (struct sockaddr *) &sin, sizeof(sin));
        if(r < 0) {
                perror("Error binding socket:");
                exit(0);
        }

        r = listen(sock, 10);
        if(r < 0) {
                perror("Error listening socket:");
                exit(0);
        }

        printf("HTTP server listening on port %d\n", port);
        
        
        //pthread_create(&listener, NULL, listener_function, NULL);
        //pthread_join(listener, NULL);
        int queClock = 0;
    while (1)
    {
        int s;
                
        s = accept(sock, NULL, NULL);
        if (s < 0) {
        printf("sock error\n"); 
            break;
        }
        sem_wait(&sem_empty);
        //pthread_mutex_lock(&mutex);
        printf("listener in mutex\n");
        request[queClock] = s;
        //pthread_mutex_unlock(&mutex);
        queClock ++;
        printf("listener exit mutex\n counter: %d",queClock);
        if (queClock == 100) queClock = 0; 
        sem_post(&sem_full);
        
    }

        close(sock);
}

And the thread function for each of my thread pool threads:

void * thread_function(void *arg){
    pid_t x = syscall(__NR_gettid);
    int temp;
    while(1){
        
        sem_wait(&sem_full);
        pthread_mutex_lock(&mutex);
        printf("pool thread %d in mutex\n",x);
        temp = request[index];
        index++;
        if(index == 100) index = 0;
        pthread_mutex_unlock(&mutex);
        
        printf("pool thread %d after queue\n temp: %d index: %d\n",x,temp,index);

        process(temp);

        printf("pool thread %d after process call\n",x);
        
        sem_post(&sem_empty);
        
        sem_getvalue(&sem_empty, &requestCount);
        printf("sem empty value: %d\n",requestCount);
        
    }
}`

I run my server in the background and then after running multiple groups of 100 client requests I get this error normally on the 4th or 5th set of requests error the code always crashes on the line of a usleep call in a file I didn't modify that my thread enters through the process function call in my thread function:

int process(int fd) {
        char buf[4096];
        char *method;
        char *_path;
        char path[4096];
        char *protocol;
        struct stat statbuf;
        char pathbuf[4096];
        char cwd[1024];
        int len;
        struct sockaddr_in peer;
        int peer_len = sizeof(peer);
        FILE *f;
        
        srand(syscall(__NR_gettid) + time(NULL));
        if(CRASH > 0 && rand() % 100 < CRASH) {
                printf("Thread [pid %d, tid %d] terminated!\n", getpid(), gettid());
                close(fd);
                pthread_exit(NULL);
        }

        f = fdopen(fd, "a+");
        printf("after fdopen\n");
        usleep(100000);
        printf("before get peer name\n");
        if(getpeername(fd, (struct sockaddr*) &peer, &peer_len) != -1) {
                printf("[pid %d, tid %d] Received a request from %s:%d\n", getpid(), gettid(), inet_ntoa(peer.sin_addr), (int)ntohs(peer.sin_port));
        }
        printf("after get peer name\n");
        if(f == NULL) {
                printf("fileopen error: %s\n", fd);
                return -1;
        }

        if (!fgets(buf, sizeof(buf), f)) {
                fclose(f);
                return -1;
        }

        if(getpeername(fileno(f), (struct sockaddr*) &peer, &peer_len) != -1) {
                printf("[pid %d, tid %d] (from %s:%d) URL: %s", getpid(), gettid(),inet_ntoa(peer.sin_addr), (int)ntohs(peer.sin_port), buf);
        } else {
                printf("[pid %d, tid %d] URL: %s", getpid(), gettid(), buf);
        }

        method = strtok(buf, " ");
        _path = strtok(NULL, " ");
        protocol = strtok(NULL, "\r");
        if (!method || !_path || !protocol) {
                fclose(f);
                return -1;
        }

        getcwd(cwd, sizeof(cwd));
        sprintf(path, "%s%s", cwd, _path);

        fseek(f, 0, SEEK_CUR); // Force change of stream direction

        if (strcasecmp(method, "GET") != 0) {
                send_error(f, 501, "Not supported", NULL, "Method is not supported.");
                printf("[pid %d, tid %d] Reply: %s", getpid(), gettid(), "Method is not supported.\n");
        } else if (stat(path, &statbuf) < 0) {
                send_error(f, 404, "Not Found", NULL, "File not found.");
                printf("[pid %d, tid %d] Reply: File not found - %s", getpid(), gettid(), path);
        } else if (S_ISDIR(statbuf.st_mode)) {
                len = strlen(path);
                if (len == 0 || path[len - 1] != '/') {
                        snprintf(pathbuf, sizeof(pathbuf), "Location: %s/", path);
                        send_error(f, 302, "Found", pathbuf, "Directories must end with a slash.");
                        printf("[pid %d, tid %d] Reply: %s", getpid(), gettid(), "Directories mush end with a slash.\n");
                } else {
                        snprintf(pathbuf, sizeof(pathbuf), "%s%sindex.html",cwd, path);
                        if (stat(pathbuf, &statbuf) >= 0) {
                                send_file(f, pathbuf, &statbuf);
                                printf("[pid %d, tid %d] Reply: filesend %s\n", getpid(), gettid(), pathbuf);
                        } else {
                                DIR *dir;
                                struct dirent *de;

                                send_headers(f, 200, "OK", NULL, "text/html", -1, statbuf.st_mtime);
                                fprintf(f, "<HTML><HEAD><TITLE>Index of %s</TITLE></HEAD>\r\n<BODY>", path);
                                fprintf(f, "<H4>Index of %s</H4>\r\n<PRE>\n", path);
                                fprintf(f, "Name                             Last Modified              Size\r\n");
                                fprintf(f, "<HR>\r\n");
                                if (len > 1) fprintf(f, "<A HREF=\"..\">..</A>\r\n");

                                dir = opendir(path);
                                while ((de = readdir(dir)) != NULL) {
                                        char timebuf[32];
                                        struct tm *tm;

                                        strcpy(pathbuf, path);
                                        strcat(pathbuf, de->d_name);

                                        stat(pathbuf, &statbuf);
                                        tm = gmtime(&statbuf.st_mtime);
                                        strftime(timebuf, sizeof(timebuf), "%d-%b-%Y %H:%M:%S", tm);

                                        fprintf(f, "<A HREF=\"%s%s\">", de->d_name, S_ISDIR(statbuf.st_mode) ? "/" : "");
                                        fprintf(f, "%s%s", de->d_name, S_ISDIR(statbuf.st_mode) ? "/</A>" : "</A> ");
                                        if (strlen(de->d_name) < 32) fprintf(f, "%*s", 32 - strlen(de->d_name), "");
                                        if (S_ISDIR(statbuf.st_mode)) {
                                                fprintf(f, "%s\r\n", timebuf);
                                        } else {
                                                fprintf(f, "%s %10d\r\n", timebuf, statbuf.st_size);
                                        }
                                }
                                closedir(dir);

                                fprintf(f, "</PRE>\r\n<HR>\r\n<ADDRESS>%s</ADDRESS>\r\n</BODY></HTML>\r\n", SERVER);
                                printf("[pid %d, tid %d] Reply: SUCCEED\n", getpid(), gettid());
                        }
                }
        } else {
                send_file(f, path, &statbuf);
                printf("[pid %d, tid %d] Reply: filesend %s\n", getpid(), gettid(), path);
        }
        
        fclose(f);
        return 0;
}

This then calls another function that has a write call that causes the broken pipe error.

void send_file(FILE *f, char *path, struct stat *statbuf) {
        char data[4096];
        int n;

        FILE *file = fopen(path, "r");
        if (!file) {
                send_error(f, 403, "Forbidden", NULL, "Access denied.");
        } else {
                int length = S_ISREG(statbuf->st_mode) ? statbuf->st_size : -1;
                send_headers(f, 200, "OK", NULL, get_mime_type(path), length, statbuf->st_mtime);

                while ((n = fread(data, 1, sizeof(data), file)) > 0) fwrite(data, 1, n, f);
                fclose(file);
        }
}

Is the fact I just control my producer thread with a semaphore a problem?

#ifndef __WEBSERVER
#define __WEBSERVER

#define NDEBUG

#ifdef NDEBUG
#define debug(M, ...)
#else
 __LINE__, ##__VA_ARGS__)
#endif
extern int CRASH;
int process(int fd);
int gettid();

#endif

Header file added for extra information.

I am pretty sure I'm not passing in anything in uninitialized because it works most of the time i have tried changing up almost everything i am about to just restart at this point if anyone could point me in the right direction on what I'm doing wrong or give me tips on using valgrind on a infinite looping backgrounded process it would be a big help it always happens about 1/3 to 1/2 way through with a batch of requests so I don't think its like Socket: send() function returned 'Broken Pipe' error

==29486==
==29486== Process terminating with default action of signal 13 (SIGPIPE)
==29486==    at 0x5142BBD: ??? (in /usr/lib64/libc-2.17.so)
==29486==    by 0x50CD2F2: _IO_file_write@@GLIBC_2.2.5 (in /usr/lib64/libc-2.17.so)
==29486==    by 0x50CEB0D: _IO_do_write@@GLIBC_2.2.5 (in /usr/lib64/libc-2.17.so)
==29486==    by 0x50CDA4F: _IO_file_xsputn@@GLIBC_2.2.5 (in /usr/lib64/libc-2.17.so)
==29486==    by 0x50C27E1: fwrite (in /usr/lib64/libc-2.17.so)
==29486==    by 0x401E4B: send_file (net.c:78)
==29486==    by 0x402732: process (net.c:197)
==29486==    by 0x401589: thread_function (webserver.c:37)
==29486==    by 0x4E3EEA4: start_thread (in /usr/lib64/libpthread-2.17.so)
==29486==    by 0x5151B0C: clone (in /usr/lib64/libc-2.17.so)
[pid 29486, tid 65] Received a request from 127.0.0.1:35764
==29486==
==29486== HEAP SUMMARY:
==29486==     in use at exit: 12,408 bytes in 22 blocks
==29486==   total heap usage: 170 allocs, 148 frees, 94,882 bytes allocated
==29486==
==29486== 560 bytes in 1 blocks are possibly lost in loss record 1 of 4
==29486==    at 0x4C2C089: calloc (vg_replace_malloc.c:762)
==29486==    by 0x4012784: _dl_allocate_tls (in /usr/lib64/ld-2.17.so)
==29486==    by 0x4E3F87B: pthread_create@@GLIBC_2.2.5 (in /usr/lib64/libpthread-2.17.so)
==29486==    by 0x4018C0: main (webserver.c:146)
==29486==
==29486== 5,600 bytes in 10 blocks are possibly lost in loss record 3 of 4
==29486==    at 0x4C2C089: calloc (vg_replace_malloc.c:762)
==29486==    by 0x4012784: _dl_allocate_tls (in /usr/lib64/ld-2.17.so)
==29486==    by 0x4E3F87B: pthread_create@@GLIBC_2.2.5 (in /usr/lib64/libpthread-2.17.so)
==29486==    by 0x401878: main (webserver.c:143)
==29486==
==29486== LEAK SUMMARY:
==29486==    definitely lost: 0 bytes in 0 blocks
==29486==    indirectly lost: 0 bytes in 0 blocks
==29486==      possibly lost: 6,160 bytes in 11 blocks
==29486==    still reachable: 6,248 bytes in 11 blocks
==29486==         suppressed: 0 bytes in 0 blocks
==29486== Reachable blocks (those to which a pointer was found) are not shown.
==29486== To see them, rerun with: --leak-check=full --show-leak-kinds=all
==29486==
==29486== For lists of detected and suppressed errors, rerun with: -s
==29486== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)

Segmentation fault (core dumped)
[mzy22580@csci-odin project2]$  counter: 55before get peer name
[pid 12112, tid 27] Received a request from 127.0.0.1:36512
after get peer name
[pid 12112, tid 27] (from 127.0.0.1:36512) URL: GET /index.html HTTP/1.0
==12112==
==12112== Process terminating with default action of signal 13 (SIGPIPE)
==12112==    at 0x5142BBD: ??? (in /usr/lib64/libc-2.17.so)
==12112==    by 0x50CD2F2: _IO_file_write@@GLIBC_2.2.5 (in /usr/lib64/libc-2.17.so)
==12112==    by 0x50CEB0D: _IO_do_write@@GLIBC_2.2.5 (in /usr/lib64/libc-2.17.so)
==12112==    by 0x50CDA4F: _IO_file_xsputn@@GLIBC_2.2.5 (in /usr/lib64/libc-2.17.so)
==12112==    by 0x50C27E1: fwrite (in /usr/lib64/libc-2.17.so)
==12112==    by 0x401E4B: send_file (net.c:78)
==12112==    by 0x402732: process (net.c:197)
==12112==    by 0x401589: thread_function (webserver.c:37)
==12112==    by 0x4E3EEA4: start_thread (in /usr/lib64/libpthread-2.17.so)
==12112==    by 0x5151B0C: clone (in /usr/lib64/libc-2.17.so)
==12112==
==12112== HEAP SUMMARY:
==12112==     in use at exit: 12,408 bytes in 22 blocks
==12112==   total heap usage: 298 allocs, 276 frees, 167,586 bytes allocated
==12112==
==12112== 560 bytes in 1 blocks are possibly lost in loss record 1 of 4
==12112==    at 0x4C2C089: calloc (vg_replace_malloc.c:762)
==12112==    by 0x4012784: _dl_allocate_tls (in /usr/lib64/ld-2.17.so)
==12112==    by 0x4E3F87B: pthread_create@@GLIBC_2.2.5 (in /usr/lib64/libpthread-2.17.so)
==12112==    by 0x4018C0: main (webserver.c:146)
==12112==
==12112== 568 bytes in 1 blocks are still reachable in loss record 2 of 4
==12112==    at 0x4C29F73: malloc (vg_replace_malloc.c:309)
==12112==    by 0x50C1C4C: __fopen_internal (in /usr/lib64/libc-2.17.so)
==12112==    by 0x401D81: send_file (net.c:71)
==12112==    by 0x402732: process (net.c:197)
==12112==    by 0x401589: thread_function (webserver.c:37)
==12112==    by 0x4E3EEA4: start_thread (in /usr/lib64/libpthread-2.17.so)
==12112==    by 0x5151B0C: clone (in /usr/lib64/libc-2.17.so)
==12112==
==12112== 5,600 bytes in 10 blocks are possibly lost in loss record 3 of 4
==12112==    at 0x4C2C089: calloc (vg_replace_malloc.c:762)
==12112==    by 0x4012784: _dl_allocate_tls (in /usr/lib64/ld-2.17.so)
==12112==    by 0x4E3F87B: pthread_create@@GLIBC_2.2.5 (in /usr/lib64/libpthread-2.17.so)
==12112==    by 0x401878: main (webserver.c:143)
==12112==
==12112== 5,680 bytes in 10 blocks are still reachable in loss record 4 of 4
==12112==    at 0x4C29F73: malloc (vg_replace_malloc.c:309)
==12112==    by 0x50C14C4: fdopen@@GLIBC_2.2.5 (in /usr/lib64/libc-2.17.so)
==12112==    by 0x401F6E: process (net.c:104)
==12112==    by 0x401589: thread_function (webserver.c:37)
==12112==    by 0x4E3EEA4: start_thread (in /usr/lib64/libpthread-2.17.so)
==12112==    by 0x5151B0C: clone (in /usr/lib64/libc-2.17.so)
==12112==
==12112== LEAK SUMMARY:
==12112==    definitely lost: 0 bytes in 0 blocks
==12112==    indirectly lost: 0 bytes in 0 blocks
==12112==      possibly lost: 6,160 bytes in 11 blocks
==12112==    still reachable: 6,248 bytes in 11 blocks
==12112==         suppressed: 0 bytes in 0 blocks
==12112==
==12112== ERROR SUMMARY: 2 errors from 2 contexts (suppressed: 0 from 0)
[mzy22580@csci-odin project2]$
user16217248
  • 3,119
  • 19
  • 19
  • 37
  • 1
    Please post code, errors, sample data or textual output here as plain-text, not as images that can be hard to read, can’t be copy-pasted to help test code or use in answers, and are barrier to those who depend on screen readers or translation tools. You can edit your question to add the code in the body of your question. For easy formatting use the `{}` button to mark blocks of code, or indent with four spaces for the same effect. The contents of a **screenshot can’t be searched, run as code, or easily copied and edited to create a solution.** – tadman Mar 03 '23 at 03:46
  • I couldn't copy paste the terminal but I did edit the code – Zachary Young Mar 03 '23 at 04:12
  • How do you get to think _the code allways crashes on the line of a usleep call_? – Armali Mar 03 '23 at 07:44
  • 1
    Your thread callbacks invoke undefined behavior by not having a `return 0;` at the end of the function. – Lundin Mar 03 '23 at 11:18
  • 1
    @Armali origionally i thought it was because the last debugging print statement that showed in the terminal but I think it was probably a different thread that caused the crash at net.c line 78 – Zachary Young Mar 03 '23 at 18:01
  • How is `MAX_REQUEST` defined? – Armali Mar 03 '23 at 18:32
  • Not likely related to your error, but it is pointless to start a single thread and then immediately join it, as you do with the `req_handler()` thread. Calling the thread function directly is more efficient, and has almost identical semantics (the only difference I can think of is the effect of a call to `pthread_exit()` within the scope of the erstwhile thread function). – John Bollinger Mar 07 '23 at 14:38

1 Answers1

0

The issue does not seem to be directly related to the threading or the use of semaphores and mutexes.

The code is fairly sound, but it does have a few weaknesses that make it less robust than it could be. Some of them are in the area of error handling, and that's where the server is in fact falling over. The stack trace associated with your SIGPIPE shows that it is triggered by the fwrite() call in your sendfile() function. Since you're writing to a socket, this should be interpreted as indicating that the connection to the remote peer was closed before or during the execution of fwrite().

This is, yes, exactly what Socket: send() function returned 'Broken Pipe' error describes. At this point, I see no reason why it happening partway through a batch of requests should counterindicate that explanation.

It's unclear why this is happening in your particular case,* but it is a possibility that no web server can safely ignore. See How to prevent SIGPIPEs (or handle them properly) for information on preventing a SIGPIPE from killing your program, but note also that once the remote peer closes the socket, there's no point in continuing to try to send data to it. Each subsequent fwrite() should be expected to fail, but the code does not look for or handle that (nor write failures occurring for other reasons).


*Speculation: the client requests are subject to a timeout, and occasionally there is enough time between writes that it elapses, with the result that the client aborts the request on its side. Making the server multithreaded increases the likelihood of such a failure happening while the server is handling a request, as opposed to before it commences. A client-side abort before request handling starts could manifest differently.

John Bollinger
  • 160,171
  • 8
  • 81
  • 157