1

I wrote simple application in C which forks one child to act as a network server, and many children to act as a network clients. Clients connect to the server, and ask for data. Here is the code:

#include <stdio.h>
#include <sys/select.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <signal.h>
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/queue.h>
#include <strings.h>
#include <sys/wait.h>


#define LISTENSOCKET 1519
#define MAXCLIENT 200
#define MAXLINE 80


void client_do_something(int c) 
{
    printf("Process %d, server gave %d\n", getpid(), c);
}


void client_body()
{
    struct sockaddr_in clientaddr;
    struct sockaddr_in localaddr;
    int sockfd;
    int nread;
    int s;
    char serveraddr[20] = "127.0.0.1";
    char laddr[20];
    char command[4] = "GET";
    int c;
    command[3] = '\0';
    socklen_t len;

    sockfd = socket(AF_INET, SOCK_STREAM, 0);
    clientaddr.sin_family = AF_INET;
    clientaddr.sin_port = htons(LISTENSOCKET);
    inet_pton(AF_INET, serveraddr, &clientaddr.sin_addr);
    printf("Client %d started\n", getpid());
    if ((s=connect(sockfd, (struct sockaddr *) &clientaddr, sizeof(clientaddr))) != 0) {
        perror("conn err:");
        printf("Connect error: pid %d %d\n", getpid(), errno);
        close(sockfd);
        sleep(1);
        exit(1);
    }
    getsockname(sockfd, (struct sockaddr *) &localaddr, &len);
    inet_ntop(AF_INET,  &localaddr.sin_addr, laddr, sizeof(laddr));
    printf("Client %d passed connect (%d), %s:%d \n", getpid(), s, laddr,     ntohs(localaddr.sin_port));
    while(1) {
        send(sockfd, command, 4, 0);
        if ( (nread = recv(sockfd, &c, 4, 0)) < 0 ) {
            if (errno == ENOTCONN) {
                sleep(1);
                continue;
            }
            perror("client recv err:");
            printf("Client %d received error %d ", getpid(), errno);
            exit(1);
        } else if (nread == 0) {
            printf("Pid %d received FIN\n", getpid());
            close(sockfd);
            exit(0);
        }
        client_do_something(c);
    }
}


int start_server() {
    int i, nread, maxi, val, listenfd, connfd, sockfd, maxfd, nready, client[MAXCLIENT];
    socklen_t len;
    char *c;
    struct sockaddr_in servaddr, clientaddr, localaddr;
    fd_set rset, allset;
    char addr[MAXLINE];
    char laddr[INET_ADDRSTRLEN];
    struct timeval timeout;
    printf("Started\n");
    printf("Server PID=%d", getpid());
    val = 0;

    bzero(&servaddr, sizeof(servaddr));
    servaddr.sin_family = AF_INET;
    servaddr.sin_port = htons(LISTENSOCKET);
    servaddr.sin_addr.s_addr = htonl(INADDR_ANY);

    if((listenfd = socket(AF_INET, SOCK_STREAM, 0)) < 0)
        perror("socket failed\n");


    if(bind(listenfd, (struct sockaddr *) &servaddr, (socklen_t) sizeof(servaddr)) == -1) 
        perror("listen failed\n");
    getsockname(listenfd, (struct sockaddr *) &localaddr, &len);
    inet_ntop(AF_INET,  &localaddr.sin_addr, laddr, INET_ADDRSTRLEN);
    printf("Server %d passed connect , %s:%d \n", getpid(),  laddr, ntohs(localaddr.sin_port));
    if(listen(listenfd, 10) == -1) 
        perror("listen failed\n");

    maxi = -1;
    for(i=0; i < MAXCLIENT; i++)
    client[i] = -1;
    maxfd = listenfd;
    FD_ZERO(&allset);
    FD_SET(listenfd, &allset);

    bzero(&timeout, sizeof(struct timeval));
    timeout.tv_sec = 15;
    printf("Server process, after listen, sleep 5s before accept\n");
/* Here I purpousely sleep because I want client to initiate connection before accept */
    sleep(5);
    printf("Server porcess, slept for 5s\n");
    while(1) {
    rset = allset;
    nready = select(maxfd + 1, &rset, NULL, NULL, NULL);
    if (FD_ISSET(listenfd, &rset)) {
        len = sizeof(clientaddr);
        bzero(&clientaddr, sizeof(clientaddr));
        if((connfd = accept(listenfd, (struct sockaddr *) &clientaddr, &len)) < 0) {
        perror("accept failed");
        exit(-1);
        }
        for(i=0; i < MAXCLIENT; i++) {
        if(client[i] < 0) {
            client[i] = connfd;
            break;
        }
        } 
        FD_SET(connfd, &allset);
        if(connfd >= maxfd)
        maxfd = connfd;
        if(i > maxi)
        maxi = i;
        if(--nready <= 0) 
        continue;
    }
    for(i=0; i <= maxi; i++) {
        if ( (sockfd = client[i]) < 0)
        continue;
        if(FD_ISSET(sockfd, &rset)) {
        if( (nread = read(sockfd, addr, MAXLINE)) < 0) {
            if(errno == EINTR) 
            nread = 0;
            else {
            printf("Izlazim");
            return -1;
            }
        } else if (nread == 0) {
            close(sockfd);
            FD_CLR(sockfd, &allset);
            client[i] = -1;
                    continue;
        }

        if (strncmp(addr, "GET", 3) == 0) {
                    if(val < 100) {
                        i = send(sockfd, (int *) &val, (size_t) sizeof(val), 0);
                        val++;
                        if (i == -1)
                            printf("nread=%d, errno=%d\n",nread, errno); 
                    } else {
                        FD_CLR(sockfd, &allset);
                        client[i] = -1;
                        close(sockfd);
                    }
            } else {
            c = addr;
                    printf("Poslao je addr=%s\n", addr);
            }
            if(--nready <= 0) 
            break;
        }
    }
    }
}


int main(int argc, char *argv[]) {
    printf("Pid=%d",getpid());
    int i, child_num, status;
    pid_t p, pp;
    child_num = 100;
    printf("%d childs, pid=%d \n", child_num, getpid());
    p = fork();
    if(p == 0) {
        start_server();
        exit(0);
    } else if (p < 0) {
        exit(1);
    }
    pp = p;
    for(i = 0; i < child_num; i++) {
        p = fork();
        if (p < 0) {
            exit(1);
        } else if (p == 0){
            client_body();
            exit(0);
        }
    }
    for(i = 0; i < child_num; i++) {
        wait(&status);
    }
    kill(pp, SIGTERM);
    return 0;
}

The problem is in connect() system call (in client_body function); according to the man pages, connect should return 0 if connection succeeds, and -1 on error. I have noticed that, in my program, connect() returned 0 although it didn't establish connection with the server (received SYN ACK from the server). Later, in the program, the same child process issued recv system call, which produced error with errno value 104 (ECONNRESET). All the time I watched the wireshark capture, and I didn't notice RESET flag in any TCP packet sent from the server.

Anybody have any idea what's wrong? I have tested this code on linux, 2.6.38-8-generic kernel.

  • I tried this code on freebsd 8.1 system, and didn't reproduce issue that connect() returns 0 (no error) without exchanging 3-way handshake. – user1267132 Nov 04 '12 at 11:21

2 Answers2

1

You might need to insert a delay in main() to ensure the server is started before the client try to connect.

sleep(1); // should do it between the 2 forks

A connection can be reset at anytime.

Your fork() rate is probably too fast for the listen() backlog. Increase from 10 to SOMAXCONN. Insert a nanosleep() subsecond sleep between each client trying to connect, maybe 10ms would do it. When you exceed this the client will see ECONNRESET.

No sure why you have sleep(1) before exit() from client. Look into setsocketopt() linger settings online for what I think you are trying to achieve and exit immediately. Some links: https://lists.mindrot.org/pipermail/openssh-unix-dev/2002-September/015275.html (contains C code of SO_LINGER use) What is the difference between calling setSoLinger with a 0 value and not enabling soLinger at all? (Java orientated but related to same mechanism)

There is no need to sleep(5) on the server. you can listen right away. This 5 second delay will compund the listen() backlog set to 10 for seeing ECONNRESET() during connect() as the server side is overloaded with incoming connections.

As an improvement most errors on recv() are reasons to terminate the connection. Except for EWOULDBLOCK/EAGAIN and EINTR. i.e. ENOTCONN is an error that should terminate use of socket (if you were UDP there would be a case to continue to keep the fd open, but not with TCP the socket will never recover is you see that). These are the only 2 cases

if(recv(fd, ...) < 0) {
  if(errno != EWOULDBLOCK && errno != EINTR) {
    // print out
    close(fd);
    exit(1);
  }
}

FWIW I know I have not answered your question directly, but I don't believe the claim you make based on the code. Maybe if you were to re-organise all log output so the getpid() is at the start of the line formatted %05d then run the program then pastebin the resulting log file as proof of the order that things are happening.

Community
  • 1
  • 1
Darryl Miles
  • 4,576
  • 1
  • 21
  • 20
  • I don't know what you think he is trying to achieve with the sleep before exit that messing with SO_LINGER will achieve, but it won't. It only affects the timing of the return from the close() method, which he has already called. The C link you posted just contains some code that doesn't do what it claims to do in its comments, and should indeed have been removed. – user207421 Nov 03 '12 at 21:31
  • Heh... 1) we don't know what is trying to be achieved, 2) you'd obviously rework the code around the sleep(1) in light of learning about the SO_LINGER and what it does. If it were me I'd use neither sleep() nor SO_LINGER and simply shutdown(fd, SHUT_RDWR) before closing the socket. The goal of my answer is to get him to read up on it and then think how it affects his app, not to do all the work for him. Which link? I see it as relevant on how to use the setsockopt() API to enable SO_LINGER. Obviously the user should also be using google. That C snipped provides example and terms. – Darryl Miles Nov 03 '12 at 22:06
  • I wouldn't do anything except close the socket. shutdown() is redundant immediately before a close(). The C snippet provides comments that say it is addressing the TIME_WAIT 'issue' when it doesn't, and provides no clarification on what SO_LINGER is actually for. Neither do you. The whole thing is a complete red herring. – user207421 Nov 03 '12 at 22:49
  • You're right about that there is no need for sleep(5) on the server, but I deliberately put it there. I wanted that my clients try to connect to the server before accept call in the server. This could also happen even without sleep(5) on the server, and also, this is how I make error conditions. The problem is that in client_body, connect returned 0, although I didn't see 3-way handshake in tcpdump capture: – user1267132 Nov 04 '12 at 11:02
  • For example, I see only this in tcpdump capture: tcpdump -r log1.cap -ln port 38640 11:45:17.531036 IP 127.0.0.1.38640 > 127.0.0.1.1519: Flags [S], seq 3043005227, win 32792, options [mss 16396,sackOK,TS val 1331905 ecr 0,nop,wscale 6], length 0 11:45:26.553459 IP 127.0.0.1.38640 > 127.0.0.1.1519: Flags [P.], seq 3043005228:3043005232, ack 14362430, win 513, options [nop,nop,TS val 1334160 ecr 1334102], length 4 – user1267132 Nov 04 '12 at 11:11
  • Please put TCP dump in the original question and delete comment here, its not very readable in comment. One aspect is that you have 1 thread doing accept() and 200 doing connect() with a listen backlog of 10 on a single host. They are not going to all get reliability connected if you flood connect() so it is expect to see refused connections. There is also Linux TCP syn flood angle to look at, although might not be active in the localhost path. Something else to research to explain the unexpected connect()=0 but recv()=-1 behaviour. – Darryl Miles Nov 04 '12 at 20:29
  • Changing listen() backlog value solved the problem of ECONNRESET errors. Didn't find out anything about connect()=0 / recv()=-1. That's enough for me for now. Thanks for your help. – user1267132 Nov 06 '12 at 14:28
1

I've seen this before. When connecting to localhost, connect() can appear to succeed even though it didn't really, and you don't find out until the first I/O operation. However because you are ignoring the error in the first I/O operation, the send(), you are getting it on the second one, the recv(), where you do check. Check the send() too.

Last time I saw this was twenty years ago and I forget how I got round it. You could try looking at SO_ERROR via getsockopt() after the connect().

user207421
  • 305,947
  • 44
  • 307
  • 483