2

I have a simple client-server example using TCP socket on Linux. The server listens on the loopback address. The client connects to server and sends some integer plus an "END" string to mark the end of data. Server reads the numbers, add them all and returns the total sum. However, my server sometime blocks on read() even though the client has sent all the data successfully.

Here's the code:

server.c:

#include <stdio.h>
#include <errno.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#define BACKLOG 5

int main(int argc, char *argv[]) {
    struct sockaddr_in addr;
    int down_flag = 0;
    int result = 0;
    int ret = 0;

    int sfd = socket(AF_INET, SOCK_STREAM, 0);
    if (sfd < 0) {
        perror("Create server socket error: %s\n");
        return 0;
    }

    /* Bind socket to loopback address */
    memset((void *) &addr, 0, sizeof(struct sockaddr_in));
    addr.sin_family = AF_INET;
    addr.sin_port = htons(8888);
    addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    if (bind(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_in)) == -1) {
        perror("Bind server socket failed");
        goto _exit;
    }

    if (listen(sfd, BACKLOG) == -1) {
        perror("Listen failed");
        goto _exit;
    }

    ssize_t num_rd = 0;
    char buf[100] = {0};
    for (;;)
    {
        printf("Waiting to accept a connection...\n");
        int cfd = accept(sfd, NULL, NULL);
        printf("Accepted socket fd = %d\n", cfd);
        result = 0;
        while ((num_rd = read(cfd, buf, sizeof(buf))) > 0) {
            /* Ensure the buffer is 0-terminated */
            buf[sizeof(buf) - 1] = 0;
            printf("Read data: %s\n", buf);

            /* Handle commands */
            if (!strncmp(buf, "DOWN", sizeof(buf))) {
                down_flag = 1;
                break;
            }
            if (!strncmp(buf, "END", sizeof(buf))) {
                break;
            }
            /* Add received summand */
            result += atoi(buf);
        }
        if (-1 == num_rd) {
            perror("Read error");
        }

        /* Send result */
        sprintf(buf, "%d", result);
        ret = write(cfd, buf, sizeof(buf));
        if (-1 == ret) {
            perror("Write error\n");
            goto _exit;
        }
        close(cfd);
        /* Quit on DOWN command */
        if (down_flag) {
            break;
        }
    }
_exit:
    close(sfd);
    return 0;
}

client.c:

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/socket.h>
#include <unistd.h>
#include <netinet/in.h>
#include <arpa/inet.h>

int main(int argc, char *argv[]) {
    struct sockaddr_in addr;
    int ret;
    int data_socket;
    char buf[100] = {0};
    int i = 0;

    data_socket = socket(AF_INET, SOCK_STREAM, 0);
    if (-1 == data_socket) {
        perror("Create client socket error");
        exit(EXIT_FAILURE);
    }

    /* Connect to server socket */
    memset(&addr, 0, sizeof(addr));
    addr.sin_family = AF_INET;
    addr.sin_port = htons(8888);
    addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
    ret = connect(data_socket, (const struct sockaddr *) &addr, sizeof(addr));
    if (-1 == ret) {
        perror("Connect error");
        exit(EXIT_FAILURE);
    }

    /* Send arguments */
    for (i = 1; i < argc; i++) {
        ret = write(data_socket, argv[i], strlen(argv[i]) + 1);
        if (-1 == ret) {
            perror("Write error");
            break;
        }
    }
    strcpy(buf, "END");
    ret = write(data_socket, buf, strlen(buf) + 1);
    printf("write %s to socket, ret = %d\n", buf, ret);
    if (-1 == ret) {
        perror("Write to socket error");
        exit(EXIT_FAILURE);
    }
    /* Read the result */
    memset(buf, 0, sizeof(buf));
    ret = read(data_socket, buf, sizeof(buf));
    if (-1 == ret) {
        perror("Read from client socket error");
        exit(EXIT_FAILURE);
    }
    buf[sizeof(buf) - 1] = 0;
    printf("Result = %s\n", buf);
    close(data_socket);
    exit(EXIT_SUCCESS);
}

Run the client a few times and the server will be blocking on the read() call at some points:

$ for i in {1..100}; do ./client 3 4 5 6; done
write END to socket, ret = 4
Result = 18
write END to socket, ret = 4

Server output:

$ ./server
Waiting to accept a connection...
Accepted socket fd = 4
Read data: 3
Read data: 4
Read data: 5
Read data: 6
Read data: END
Waiting to accept a connection...
Accepted socket fd = 4
Read data: 3

The server is blocking on while ((num_rd = read(cfd, buf, sizeof(buf))) > 0) line.

Edit: My question is why the read() blocks. AFAIK, read() will block until there's at least 1 byte of data to be read from the socket. In this case, the client has sent more data than the server has read, so I think there's available data to be read from the socket. So why does read() still block?

theman
  • 598
  • 1
  • 7
  • 19
  • Log the total number of bytes sent and the total number of bytes received. You'll find that they are equal. So `read` should block since all the sent data has been received. – David Schwartz Nov 07 '20 at 21:54
  • Why not use send and recv calls instead of read and write. – kjohri Nov 08 '20 at 03:08

3 Answers3

3

The core of the issue is that the code tests for the first message in a buffer and ignores the possibility that the same buffer might contain multiple messages, partial messages or any other combination (see edit). For this reason, the message END is sometimes overlooked and the read loop never terminated.

The code assumes that a single read will receive exactly what a single write call had sent.

This is highly inaccurate, rarely true and might work only sometimes, when both client and server are on the same machine.

A single read might read 2 write calls simultaneously, or it might read half a write call and then another 1.5 write calls later on...

TCP/IP (unlike UDP) is a streaming protocol and has no knowledge of message boundaries.


EDIT:

To clarify (as requested in the comment), imagine a call to read collects the following data "1234\0EN" (the next read will collect "D\0")... what does the program do?

Another possible situation is that all writes are read in one go. i.e, buf contains the string "3\04\05\06\0END\0".

What happens within the loop at this point?

In this example scenario, the if statement (strncmp(buf, "END", sizeof(buf)) is always false (and unsafe), resulting in a situation where the server never breaks from the while(read) loop.

Since the while loop continues, the server will attempt another read when there's no data available, resulting in the server blocking until the client will send additional data.

Myst
  • 18,516
  • 2
  • 45
  • 67
  • Thanks. However this still doesn't help me to understand why the `read()` blocks in this case, or to say it your way, this `read()` reads 0 `write()`. – theman Nov 07 '20 at 16:24
  • @theman ... hmmm, I'll edit my answer with more details, but consider a situation where `read` gets `"1234\0END\0"`. When does the `while` loop break? – Myst Nov 07 '20 at 16:33
3

The null terminator needs to be immediately after the data that was read, not just at the end of the buffer. Otherwise when you try to treat the buffer as a string, the previous contents will be part of the string.

buf[num_read] = '\0';

And to prevent this from writing outside the buffer, you need to subtract 1 from the buffer size when calling read():

while ((num_rd = read(cfd, buf, sizeof(buf)-1)) > 0) {

The server then blocks because it doesn't recognize the END message that makes it break out of the reading loop, and there's nothing more for it to read. It will block until there's either data to read or EOF. It will get EOF when the client closes the connection, but the client doesn't do that until the server sends the result.

However, I think your whole design may be doomed. TCP is a stream protocol, it doesn't have message boundaries. Each call to write() in the client doesn't necessarily result in a single read() in the server, consecutive writes can (and usually will) be concatenated. You'll need to redesign your protocol to handle this.

Barmar
  • 741,623
  • 53
  • 500
  • 612
1

most straight-forward is to use a poll().

upd:
i think real cause of the stuff written in other answer. you wanna pipe where you have a stream.
But
you can try this

SOCKET q = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
bool c = 1;
setsockopt(q, IPPROTO_TCP, TCP_NODELAY, (char *)&c, sizeof(c));

Finally

#include <stdio.h>
#include <stddef.h>
#include <sys/socket.h>    //socket
#include <netdb.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdlib.h>
#include <netinet/tcp.h>
#include <string.h>
#include <poll.h>

int main(int argc , char *argv[])
{
    struct sockaddr_in addr = { AF_INET , htons( 8888 ) /* btc port */ , htonl(INADDR_LOOPBACK) };;
    int down_flag = 0;
    int result = 0;
    int ret = 0;
    bool c = !false;
    enum { buf_size = 1025 };
    char buf[buf_size] = {0};
    int bc = 1024;    

    int data_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
    setsockopt(data_socket, SOL_SOCKET, SO_KEEPALIVE, (char *)&c, sizeof(c));
    setsockopt(data_socket, IPPROTO_TCP, TCP_NODELAY, (char *)&c, sizeof(c));
    setsockopt(data_socket, SOL_SOCKET, SO_REUSEADDR, (char *)&c, sizeof(c));
    setsockopt(data_socket, SOL_SOCKET, SO_REUSEPORT, (char *)&c, sizeof(c));
    setsockopt(data_socket, SOL_SOCKET, SO_SNDBUF, (int *)&bc, sizeof(bc));
    setsockopt(data_socket, SOL_SOCKET, SO_RCVBUF, (int *)&bc, sizeof(bc));
    ret = connect(data_socket, (const struct sockaddr *) &addr, sizeof(addr));
    if (-1 == ret) {
        perror("Connect error");
        exit(EXIT_FAILURE);
    }
    setsockopt(data_socket, IPPROTO_TCP, TCP_NODELAY, (char *)&c, sizeof(c));    /* Send arguments */

    struct pollfd pfd[1];
    int nready;
    pfd[0].fd = data_socket;
    pfd[0].events = POLLOUT;

    for (int k = 1; k < argc; k++)
    {
        nready = poll(pfd, 1, 15 * 1000);
        // if((pfd[0].revents & (POLLOUT|POLLHUP))) printf("tray \n" );
        memset(buf, 0, sizeof(buf));
        strcpy(buf, argv[k]);
        ret = write(data_socket, buf , 99);
        if (-1 == ret) {
            perror("Write error");
            break;
        }
    }

    memset(buf, 0, sizeof(buf));
    strcpy(buf, "END");
    nready = poll(pfd, 1, 15 * 1000);
    ret = write(data_socket, buf, 99);
    printf("write %s to socket, ret = %d\n", buf, ret);
    if (-1 == ret) {
        perror("Write to socket error");
        exit(EXIT_FAILURE);
    }
    /* Read the result */
    memset(buf, 0, sizeof(buf));
    pfd[0].events = POLLIN;
    nready = poll(pfd, 1, 15 * 1000);
    ret = read(data_socket, buf, 99);
    if (-1 == ret) {
        perror("Read from client socket error");
        exit(EXIT_FAILURE);
    }
    buf[sizeof(buf) - 1] = '\0';
    printf("Result = %10s\n", buf);
    close(data_socket);
    exit(EXIT_SUCCESS);
}

And

#include <stdio.h>
#include <stddef.h>
#include <sys/socket.h>    //socket
#include <netdb.h>
#include <unistd.h>
#include <stdbool.h>
#include <stdlib.h>
#include <netinet/tcp.h>
#include <string.h>
#include <poll.h>


int main(int argc , char *argv[])
{
    struct sockaddr_in addr = { AF_INET , htons( 8888 ) /* btc port */ , htonl(INADDR_LOOPBACK) };;
    int down_flag = 0;
    int result = 0;
    int ret = 0;
    bool c = !false;
    int bc = 1024;    

    int sfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
    if (sfd < 0) {
        perror("Create server socket error: %s\n");
        return 0;
    }
    setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (char *)&c, sizeof(c));
    setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (char *)&c, sizeof(c));
    setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (char *)&c, sizeof(c));
    setsockopt(sfd, SOL_SOCKET, SO_REUSEPORT, (char *)&c, sizeof(c));
    setsockopt(sfd, SOL_SOCKET, SO_SNDBUF, (int *)&bc, sizeof(bc));
    setsockopt(sfd, SOL_SOCKET, SO_RCVBUF, (int *)&bc, sizeof(bc));
    /* Bind socket to loopback address */
    if (bind(sfd, (struct sockaddr *) &addr, sizeof(struct sockaddr_in)) == -1) {
        perror("Bind server socket failed");
        goto _exit;
    }
    setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (char *)&c, sizeof(c));

    if (listen(sfd, 128) == -1) {
        perror("Listen failed");
        goto _exit;
    }

    ssize_t num_rd = 0;
    enum { buf_size = 1025 };
    char buf[buf_size] = {0};

    struct pollfd pfd[1] = {{0}};
    int nready;

    for (;;)
    {
        printf("Waiting to accept a connection...\n");
        int cfd = accept(sfd, NULL, NULL);
        printf("Accepted socket fd = %d\n", cfd);
        result = 0;
        pfd[0].fd = cfd;
//      pfd[0].fd = sfd;
        pfd[0].events = POLLIN;
        while (!false) {
            memset(buf, 0, sizeof(buf));
            nready = poll(pfd, 1, 15 * 1000);
            num_rd = read(cfd, buf, 99);
            if (num_rd <= 0) break;
            buf[sizeof(buf) - 1] = '\0';
            printf("Read data: %s\n", buf);

            /* Handle commands */
            if (!strncmp(buf, "DOWN", strlen(buf))) {
                down_flag = 1;
                break;
            }
            if (!strncmp(buf, "END", strlen(buf))) {
                break;
            }
            int temp = 0;
            int f = sscanf(buf, "%d", &temp);
            if (f != 1)
            {
                printf("and then \n" );
                return (0);
            }
            result = result + temp;
        }
        if (-1 == num_rd) {
            perror("Read error");
        }

        memset(buf, 0, sizeof(buf));
        sprintf(buf, "%d", result);
        pfd[0].events = POLLOUT;
        nready = poll(pfd, 1, 15 * 1000);
        ret = write(cfd, buf, 99);
        if (-1 == ret) {
            perror("Write error\n");
            goto _exit;
        }
        close(cfd);
        /* Quit on DOWN command */
        if (down_flag) {
            break;
        }
    }
_exit:
    close(sfd);
    return 0;
}

also one size for all stuff there. After one write read_buffer should be full so others wait until read is finished up

run:
./serv
for i in {1..100}; do ./client 3 4 5 6; done

also on github:
https://github.com/alexeyneu/BlockZero/tree/master/onemore

edit:
with 1kb/command looks like it will hold against some big transfers. It's minimum for SO_XXX buffers

  • How does this solve the underlying problem...? Can you identify the underlying problem? If not, why answer? – Myst Nov 07 '20 at 16:41
  • @Myst looks like you just don't know what `poll()` does . pumping bulk of empty `read`'s with a `while()` is bad stuff for sure. it's not even your question – Алексей Неудачин Nov 07 '20 at 18:35
  • I am the author of the [facil.io framework](http://facil.io), which is a C framework for web and network applications. I know what `poll` does. However, `poll` won't solve the problem, because the issue with the code in question isn't related to polling or to non-blocking IO. The issue with the code is that the design tests for the first message in a buffer and ignores the possibility that the same buffer might contain multiple messages. – Myst Nov 07 '20 at 20:23
  • P.S., I didn't downvote the answer because I think you're only trying to help, which is good. However, I think this doesn't answer the question. It is more of a "here's another approach that could help you". Also, looping on `read` is a common (and valid) approach when handling blocking IO. – Myst Nov 07 '20 at 20:25
  • Yeah. we see one of those now – Алексей Неудачин Nov 07 '20 at 23:30
  • @Myst haha why then you dont know this https://stackoverflow.com/questions/13479760/c-socket-recv-and-send-all-data#comment50470059_13479835 . if used with socket opt i've mentioned it should do the trick – Алексей Неудачин Nov 08 '20 at 06:12
  • Maybe my comments weren't clear. I know how [`poll` works](https://github.com/facil-io/cstl/blob/ff251c2d7ce521e40f95a96cf09998c8e7153c97/stl_slices/104%20sock.h#L296-L343), as well as [`epoll` and `setsockopt`](https://stackoverflow.com/questions/41582560/how-does-epolls-epollexclusive-mode-interact-with-level-triggering/41730177#41730177), as you can tell from my code examples. But in your solution, you are **moving the problem** from `write` to `poll` - you aren't **solving** the problem. The problem is in a different place. – Myst Nov 08 '20 at 10:46
  • @Myst thats may be. i prefer `poll()` to non-blockin write that blocks and all this. – Алексей Неудачин Nov 08 '20 at 19:10