4

I'm currently testing my networking code. This involves making a connection via the IPv4 loopback address (127.0.0.1). Unfortunately the program often (not always) gives an EPIPE error on sending data.

I am using Berkeley network sockets and libevent. I make a non-blocking socket via:

CBSocketReturn CBNewSocket(uint64_t * socketID,bool IPv6){
    *socketID = socket(IPv6 ? PF_INET6 : PF_INET, SOCK_STREAM, 0);
    if (*socketID == -1) {
        if (errno == EAFNOSUPPORT || errno == EPROTONOSUPPORT) {
            return CB_SOCKET_NO_SUPPORT;
        }
        return CB_SOCKET_BAD;
    }
    // Stop SIGPIPE annoying us.
    if (CB_NOSIGPIPE) {
        int i = 1;
        setsockopt(*socketID, SOL_SOCKET, SO_NOSIGPIPE, &i, sizeof(i));
    }
    // Make socket non-blocking
    evutil_make_socket_nonblocking((evutil_socket_t)*socketID);
    return CB_SOCKET_OK;
}

I make a connection event via:

bool CBSocketDidConnectEvent(uint64_t * eventID,uint64_t loopID,uint64_t socketID,void (*onDidConnect)(void *,void *),void * node){
    CBEvent * event = malloc(sizeof(*event));
    event->loop = (CBEventLoop *)loopID;
    event->onEvent.ptr = onDidConnect;
    event->node = node;
    event->event = event_new(((CBEventLoop *)loopID)->base, (evutil_socket_t)socketID, EV_TIMEOUT|EV_WRITE, CBDidConnect, event);
    if (NOT event->event) {
        free(event);
        event = 0;
    }
    *eventID = (uint64_t)event;
    return event;
}
void CBDidConnect(evutil_socket_t socketID,short eventNum,void * arg){
    CBEvent * event = arg;
    if (eventNum & EV_TIMEOUT) {
        // Timeout for the connection
        event->loop->onTimeOut(event->loop->communicator,event->node,CB_TIMEOUT_CONNECT);
    }else{
        // Connection successful
        event->onEvent.ptr(event->loop->communicator,event->node);
    }
}

And add it via:

bool CBSocketAddEvent(uint64_t eventID,uint16_t timeout){
    CBEvent * event = (CBEvent *)eventID;
    int res;
    if (timeout) {
        struct timeval time = {timeout,0};
        res = event_add(event->event, &time);
    }else
        res = event_add(event->event, NULL);
    return NOT res;
}

To connect:

bool CBSocketConnect(uint64_t socketID,uint8_t * IP,bool IPv6,uint16_t port){
    // Create sockaddr_in6 information for a IPv6 address
    int res;
    if (IPv6) {
        struct sockaddr_in6 address;
        memset(&address, 0, sizeof(address)); // Clear structure.
        address.sin6_family = AF_INET6;
        memcpy(&address.sin6_addr, IP, 16); // Move IP address into place.
        address.sin6_port = htons(port); // Port number to network order
        res = connect((evutil_socket_t)socketID, (struct sockaddr *)&address, sizeof(address));
    }else{
        struct sockaddr_in address;
        memset(&address, 0, sizeof(address)); // Clear structure.
        address.sin_family = AF_INET;
        memcpy(&address.sin_addr, IP + 12, 4); // Move IP address into place. Last 4 bytes for IPv4.
        address.sin_port = htons(port); // Port number to network order
        res = connect((evutil_socket_t)socketID, (struct sockaddr *)&address, sizeof(address));
    }
    if (NOT res || errno == EINPROGRESS)
        return true;
    return false;
}

Upon connection the canSend event is made:

bool CBSocketCanSendEvent(uint64_t * eventID,uint64_t loopID,uint64_t socketID,void (*onCanSend)(void *,void *),void * node){
    CBEvent * event = malloc(sizeof(*event));
    event->loop = (CBEventLoop *)loopID;
    event->onEvent.ptr = onCanSend;
    event->node = node;
    event->event = event_new(((CBEventLoop *)loopID)->base, (evutil_socket_t)socketID, EV_TIMEOUT|EV_WRITE|EV_PERSIST, CBCanSend, event);
    if (NOT event->event) {
        free(event);
        event = 0;
    }
    *eventID = (uint64_t)event;
    return event;
}
void CBCanSend(evutil_socket_t socketID,short eventNum,void * arg){
    CBEvent * event = arg;
    if (eventNum & EV_TIMEOUT) {
        // Timeout when waiting to write.
        event->loop->onTimeOut(event->loop->communicator,event->node,CB_TIMEOUT_SEND);
    }else{
        // Can send
        event->onEvent.ptr(event->loop->communicator,event->node);
    }
}

But sending often gives an EPIPE error:

int32_t CBSocketSend(uint64_t socketID,uint8_t * data,uint32_t len){
    ssize_t res = send((evutil_socket_t)socketID, data, len, CB_SEND_FLAGS);
    printf("SENT (%li): ",res);
    for (uint32_t x = 0; x < res; x++) {
        printf("%c",data[x]);
    }
    printf("\n");
    if (res >= 0)
        return (int32_t)res;
    if (errno == EAGAIN)
        return 0; // False event. Wait again.
    return CB_SOCKET_FAILURE; // Failure
}

It lands on return CB_SOCKET_FAILURE; and errno is set to EPIPE. Now why would this be? The send flags is just MSG_NOSIGNAL if it is set because SIGPIPE kept interrupting the program with this error. I want EPIPE to cause CBSocketSend to return CB_SOCKET_FAILURE and not interrupt the program, but there is not reason for the send to fail with EPIPE, so why is it doing it?

Last time I got the error I noticed the thread that connects was still on the connect() call. Is there danger in making a connection event to be handled by a separate thread than the thread that connects?

See the network code in these places:

https://github.com/MatthewLM/cbitcoin/blob/master/test/testCBNetworkCommunicator.c https://github.com/MatthewLM/cbitcoin/tree/master/src/structures/CBObject/CBNetworkCommunicator https://github.com/MatthewLM/cbitcoin/tree/master/dependencies/sockets

Thank you.

Edit: I ran it again and I got the error after connect() had finished.

EDIT 2: It seems the connection event is being given without an accept from the other side.

Matthew Mitchell
  • 5,293
  • 14
  • 70
  • 122
  • You say "Last time I got the error I noticed the thread that connects was still on the connect() call" - if that is really the case, that might be the source of the problem... It really doesn't make sense to call `send()` before `connect()` has completed. EPIPE is supposed to indicate that the local side of the socket has shut down (or in this case, maybe it isn't open yet). – twalberg Aug 16 '12 at 15:41
  • Why would the connection event be fired before connect() returns? If I receive a connection event by libevent, then surely the connection is fine and I can start sending/receiving, even if connect() has not returned? That was my assumption anyway. – Matthew Mitchell Aug 16 '12 at 16:07
  • Oddly sometimes the program also still receives SIGPIPE, even though I use `setsockopt(*socketID, SOL_SOCKET, SO_NOSIGPIPE, &i, sizeof(i));`. Odd. – Matthew Mitchell Aug 16 '12 at 16:14
  • I got rid of SIGPIPE. I was using macros before I included the headers. I fixed that. Still have EPIPE of-course. – Matthew Mitchell Aug 16 '12 at 16:21
  • As twalberg stated, `EPIPE` means you are sending data to a connection that is in a state where it cannot accept any writes. In `CBDidConnect`, you only check for timeout. Are you sure the callback cannot be called for another reason other than timeout or success? – jxh Aug 16 '12 at 16:25
  • @user315052: Good point, I'll recheck the libevent documentation. I might have missed something. The connection should not fail anyway. I'm only connecting to the loopback address. – Matthew Mitchell Aug 16 '12 at 16:36
  • Seems like I'm doing it correctly. Only timeout or write events should be given to CBDidConnect. – Matthew Mitchell Aug 16 '12 at 16:39
  • I can confirm that CBDidConnect receives the EV_WRITE event. – Matthew Mitchell Aug 16 '12 at 16:42
  • Also trying to connect on the network thread does nothing to stop it... – Matthew Mitchell Aug 17 '12 at 14:41
  • I am not a TCP/IP expert, but I did notice that [this documentation](http://www.kernel.org/doc/man-pages/online/pages/man2/send.2.html) says `EPIPE` can still be returned even when `MSG_NOSIGNAL` is set with a 'stream oriented socket'. It looks like you are creating your socket with `SOCK_STREAM`. Are you sure that the other end isn't breaking the connection? – rkyser Aug 20 '12 at 20:20
  • I know EPIPE can be returned with MSG_NOSIGNAL. MSG_NOSIGNAL and SO_NOSIGPIPE is just used to prevent a SIGPIPE. The other end is not breaking the connection but I will check again... – Matthew Mitchell Aug 20 '12 at 20:57
  • I looked again. ON closer inspection it seems the connection event is given when the other side has not yet accepted... – Matthew Mitchell Aug 20 '12 at 23:40
  • 2
    In `CBSocketConnect()` it looks like if you get `EINPROGRESS` you just return `true`-- which you also return if it connects successfully. You would have no way of knowing if you needed to wait for the connection to finish. According to [this](http://www.kernel.org/doc/man-pages/online/pages/man2/connect.2.html) you could `select()` or `poll()` for connection completion. – rkyser Aug 21 '12 at 13:23
  • @ rkyser: After CBSocketConnect() the code does nothing else until the connection event is received. I'm not using select or poll, I'm using libevent. – Matthew Mitchell Aug 21 '12 at 17:34
  • Shouldn't `CBSocketConnect()` only return `true` if `res` isn't `0` **and** `errno` is equal to `EINPROGRESS`? Currently it returns `true` for any value of ´errno´ in case `connect()` failed. – alk Aug 22 '12 at 14:09
  • @rkyser: user315052 has given you opportunity to claim the bounty if you wish. I don't mind but don't count on me being able to get back in three days. – Matthew Mitchell Aug 22 '12 at 17:13
  • @MatthewMitchell I reposted my comment as an answer as requested. – rkyser Aug 22 '12 at 19:11

3 Answers3

2

I am not a TCP/IP expert, but I did notice that this documentation says EPIPE can still be returned even when MSG_NOSIGNAL is set with a 'stream oriented socket'. It looks like you are creating your socket with SOCK_STREAM. The other end may be breaking the connection.

In CBSocketConnect() it looks like if you get EINPROGRESS you just return true-- which you also return if it connects successfully. You would have no way of knowing if you needed to wait for the connection to finish. According to this you could select() or poll() for connection completion.


The above is a repost of my comments from the OP at the request of @MatthewMitchell and @user315052.


Edit: I am adding a more detailed description of this answer, and some of the discussion that followed.

So, first try to do the connect(). Then, if EINPROGRESS is the error result, register for a write event wakeup from libevent. After getting into the callback function for EV_WRITE, check to see the status of the connection with getsockopt() with socket option SO_ERROR at level SOL_SOCKET. If the option value returned is 0, the connection has succeeded. Otherwise, treat it as an errno number.

After following this advice as illustrated in this answer, you discovered that the client encountered the error ECONNREFUSED. This explains why your writes were failing with EPIPE. After investigating your server, you found that the server was not able to listen to the bound address because of the error EADDRINUSE. This can be dealt with by setting the SO_REUSEADDR option on the listening socket.

Community
  • 1
  • 1
rkyser
  • 3,241
  • 1
  • 19
  • 28
1

Below is a simple libevent toy program that synthesizes EINPROGRESS, and then waits for the connection to complete by waiting for EV_WRITE. Basically, this program shows that in your application, you should attempt to do the connect call first, and if it fails with EINPROGRESS, you should wait for completion before performing I/O.

This is the libevent callback function:

extern "C" void on_connect (int sock, short ev, void *arg) {
    assert(ev == EV_WRITE);
    std::cout << "got wrieable on: " << sock << '\n';
    int optval = -1;
    socklen_t optlen = sizeof(optval);
    getsockopt(sock, SOL_SOCKET, SO_ERROR, &optval, &optlen);
    assert(optval == 0);
    std::cout << "succesful asynchronous connect on: " << sock << '\n';
    event_loopbreak();
}

These are some helper functions used by the toy application:

static void init_addr (struct sockaddr_in *addr, short port) {
    memset(addr, '\0', sizeof(*addr));
    addr->sin_family = AF_INET;
    addr->sin_port = htons(port);
    addr->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
}

static void setup_accept (int sock) {
    const int one = 1;
    struct sockaddr_in addr;
    init_addr(&addr, 9876);
    setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
    bind(sock, (struct sockaddr *)&addr, sizeof(addr));
    listen(sock, 1);
}

static int complete_accept (int sock) {
    struct sockaddr_in addr;
    socklen_t addrlen = sizeof(addr);
    return accept(sock, (struct sockaddr *)&addr, &addrlen);
}

static int try_connect (int sock) {
    struct sockaddr_in addr;
    init_addr(&addr, 9876);
    return connect(sock, (struct sockaddr *)&addr, sizeof(addr));
}

And the main program is below:

int main () {
    int accept_sock = socket(PF_INET, SOCK_STREAM, 0);
    setup_accept(accept_sock);
    int sock = socket(PF_INET, SOCK_STREAM, 0);
    fcntl(sock, F_SETFL, fcntl(sock, F_GETFL) | O_NONBLOCK);
    std::cout << "trying first connect on: " << sock << '\n';
    int r = try_connect(sock);
    assert(r < 0 && errno == EINPROGRESS);
    event_init();
    struct event ev_connect;
    event_set(&ev_connect, sock, EV_WRITE, on_connect, 0);
    event_add(&ev_connect, 0);
    int new_sock = complete_accept(accept_sock);
    event_dispatch();
    return 0;
}
jxh
  • 69,070
  • 8
  • 110
  • 193
  • Thank you for this. I've added `getsockopt(sock, SOL_SOCKET, SO_ERROR, &optval, &optlen);` and I find out that when the connection fails I get ECONNREFUSED. Now my code handles connection errors more gracefully but I still shouldn't have an ECONNREFUSED... – Matthew Mitchell Aug 22 '12 at 13:30
  • @MatthewMitchell: `ECONNREFUSED` likely means the server is not able to accept the connections quickly enough. You can try to increase the value you pass to `listen` in the server to allow more connections to enter the listen queue in an attempt to allow your server to buffer more incoming connection requests during a heavy burst. But, you probably also need to modify your server to just call `accept` in a loop, and place all `fd`s into a container until `accept` returns `EAGAIN`, and then process the container of accepted connections. – jxh Aug 22 '12 at 14:23
  • I found it! Listening fails with EADDRINUSE. I wasn't looking at the listen() failure. I just assumed it would work. Stupid me. – Matthew Mitchell Aug 22 '12 at 15:01
  • I know about this EADDRINUSE problem but I read it only happens on Windows. The internet is a bad source of information sometimes. I'll see how to get rid of it on OSX. – Matthew Mitchell Aug 22 '12 at 15:02
  • Since this answer led me to the source of the problem and showed how to use libevent properly with connect(), I'll accept it and give the bounty. – Matthew Mitchell Aug 22 '12 at 15:03
  • @MatthewMitchell: The `setup_accept` routine in my toy program shows to use the `SO_REUSEADDR` option. – jxh Aug 22 '12 at 15:08
  • Skipped past that originally but yes, I've added it in. It should hopefully work all the time now. – Matthew Mitchell Aug 22 '12 at 15:15
  • @MatthewMitchell: Thanks for accepting my answer, but I think you should give the bounty to rkyser, since it was his comment that led me to give you this example. Ask him to submit his comment as an answer. – jxh Aug 22 '12 at 16:12
0

From the moment when your process has woken up to handle the connection success, and up until the moment it tries to write to the socket, the connection's state can still change at the operating system's kernel point of of view, and libevent cannot have foresight about it.

The scenario you are describing is can be comprised of the following stages, given that the server you are connecting to behaves in a way I am about to describe. Given Process A (your client) and Process B (the other side of the connection):

  1. B runs, binds a server socket, waits.
  2. A runs, connect(), waits
  3. B wakes, does accept()
  4. A wakes up to handle the connection's success.
  5. B closes the socket (either due to process termination or explicit close()).
  6. A tries to send, gets errno == EPIPE.

This can be reproduced on a loopback.

BTW, SO_NOSIGPIPE is not portable socket option. If you are writing a portable C library, it is better to ignore the signal using signal() with SIG_IGN.

Dan Aloni
  • 3,968
  • 22
  • 30
  • THanks for the answer, though it doesn't really satisfy my concern. You say this is reproducible over a loopback address. Why is that? If the loopback connection doesn't work then the only reason would be a problem with my code, a problem with libevent or a problem with OSX. The communication is done between two threads on the same process and neither of the threads is disconnecting from my code. There is just an EPIPE when one side tries to send to the other. – Matthew Mitchell Aug 20 '12 at 20:05
  • And SIG_IGN did not work for me when running the program in Xcode. So either Xcode does not handle it properly or OSX doesn't. I've read around and people suggest using SO_NOSIGPIPE and MSG_NOSIGNAL instead of SIG_IGN because using the signal function makes the behaviour universal applied to a process which is not desirable for a library since someone may want to implement their own SIGPIPE signal handler whilst using my library.\ – Matthew Mitchell Aug 20 '12 at 20:10
  • It is reproducible on a loopback merely because of how scheduling is done for your threads. Anyway, the other side must have closed its client socket after accepting it. About SIGPIPE - in practice, almost everyone ignores it :) – Dan Aloni Aug 20 '12 at 21:53
  • The accepting thread doesn't close the connection. I've looked and it doesn't. – Matthew Mitchell Aug 21 '12 at 17:37