I am trying to debug a problem on linux that sometimes my TCP server blocks on a socket forever, though the socket is already gone. I wrote test code to understand the behavior of close(), shutdown(), select() and recv().
I have 2 threads - thread 1 blocks on a socket using select(), and thread 2 calls shutdown(SHUT_RDWR) on the same socket. When shutdown() is called from thread 2, thread 1 wakes up, select() returns 1 but errno reads 0. Also, after select() returns 1, recv() is called and it returns 0, again errno reads 0.
If I change the thread 2 implementation to use close() instead of shutdown(), select() never wakes up.
If I change the thread 2 implementation to call shutdown() followed by close(), select() returns 1 (errno remains 0) and then recv() returns -1 and errno reads EBADF.
My questions:
When close() is called, should select() not wake up? I looked up the close() man page, but could not ascertain what should happen.
When shutdown(SHUT_RDWR) is called, should select() return normally like what I see? Or should it return 1 and set errno to EBADF? If I look at the select() man page, it says errno is set to EBADF if one or more file descriptor sets specified a file descriptor that is not a valid open file descriptor. So looks like shutdown() only sends TCP FIN to the remote side but does not actually close the socket fd?
If I call shutdown() followed by close(), it seems to help. How is this different from calling close() alone?
Note: I understand that blocking on the socket in one thread and closing the same socket from another thread is not a good design, but I am dealing with a legacy codebase, so I can't change this design. I can only tweak it to call shutdown() and then close() as it seems to work, but I want to make sure I get the fundamental understanding right.
Thanks in advance.
Here is the test code:
Thread 1:
#include "sys/socket.h"
#include "net/if.h"
#include "linux/sockios.h"
#include "netinet/in.h"
#include "fcntl.h"
#include "signal.h"
#include "errno.h"
#include "strings.h"
#include "pthread.h"
#include "errno.h"
void* f1(void* p)
{
int f1_ret, bytes, ret;
struct sockaddr_in f1so1_addr;
fd_set read_f1, error_fds;
char f1buf[20];
struct sockaddr clientInfo;
socklen_t clientAddrLen = sizeof(struct sockaddr);
f1so1 = socket(AF_INET, SOCK_STREAM, 0);
printf("f1: open fd's - f1fd1 = %d, f1so1 = %d\n", f1fd1, f1so1);
f1so1_addr.sin_family = AF_INET;
f1so1_addr.sin_addr.s_addr = htonl(INADDR_ANY);
//ret = inet_aton("10.20.30.100", &(f1so1_addr.sin_addr));
f1so1_addr.sin_port = htons(7777);
ret = bind(f1so1, (const struct sockaddr*) &f1so1_addr, sizeof(f1so1_addr));
printf("f1: bind returned %x, errno = %x\n", ret, errno);
listen(f1so1, 5);
printf("f1- listening on f1so1, blocking on accept\n");
f1so2 = accept(f1so1, &clientInfo, &clientAddrLen);
printf("f1- accept returned %x, errno = %x\n", f1so2, errno);
if(errno)
{
printf("f1: accept failed, return...\n");
return;
}
FD_ZERO(&read_f1);
FD_SET(f1so2, &read_f1);
FD_ZERO(&error_fds);
FD_SET(f1so2, &error_fds);
printf("f1: start loop\n");
while (1)
{
printf("f1: call select _read and error__ - cBLOCKING\n");
errno = 0;
FD_ZERO(&read_f1);
FD_SET(f1so2, &read_f1);
FD_ZERO(&error_fds);
FD_SET(f1so2, &error_fds);
f1_ret = select(f1so2+1, &read_f1, 0, &error_fds, 0);
printf("f1: select returned = %x, errno = %x\n", f1_ret, errno);
if (errno)
{
printf("f1: select failed...\n");
}
else if (f1_ret)
{
if (FD_ISSET(f1so2,&error_fds))
{
printf("f1: error on socket %x\n",f1so2);
}
if (FD_ISSET(f1so2,&read_f1))
{
sleep(1);
bytes = recv(f1so2, f1buf, 20, 0);
printf("f1: errno after recv = %x\n", errno);
if (errno)
{
printf("!!!recv failed!!!\n");
return;
}
printf("f1: read from socket %x, bytes = %x, data = %s\n",f1so2, bytes, f1buf);
}
}
}
printf("f1: exiting\n");
}
Thread 2:
#include "stdio.h"
#include "pthread.h"
#include "linux/socket.h"
#include "sys/socket.h"
#include "net/if.h"
#include "linux/sockios.h"
#include "netinet/in.h"
#include "fcntl.h"
#include "signal.h"
#include "errno.h"
#include "strings.h"
extern int f1so2;
void f2()
{
int f2_ret, i, choice;
struct sockaddr_in f2so2_addr;
fd_set read_f2;
char f2buf[20];
struct sigaction f2_act;
while (1)
{
printf("f2: 1: close socket, 2: shutdown socket, 2: exit f2\n");
scanf("%d", &choice);
if (choice == 1)
{
f2_ret = close(f1so2);
printf("f2: close on socket %x returned %x, errno = %x\n", f1so2, f2_ret, errno);
}
else if (choice == 2)
{
f2_ret = shutdown(f1so2, SHUT_RDWR);
printf("f2: shutdown on socket %x returned %x, errno = %x\n", f1so2, f2_ret, errno);
}
else if (choice == 3)
{
continue;
}
}
printf("f2: exiting\n");
return;
}