This issue have bothered me for weeks and I could not find any solution on the web. So I have to create a new question to you gurus.
I was trying to read/write on massive number of sockets, please see test code below. It behave normally when the sockets number is below 1500. When the number of sockets is beyond 1500, the program will crash unexpectedly. I know that I should use command ulimit -n 32768
to increase the open files number limit. But the program still can not behave correctly.
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdint.h>
#include <netdb.h>
#include <errno.h>
#include <malloc.h>
#include <string.h>
int main(int argc, char* argv[])
{
if (argc!=2)
{
printf("usage: test <number of sockets>\n");
return -1;
}
int socketsNum=atoi(argv[1]);
if (socketsNum<=0)
{
printf("error: invalid sockets number\n");
return -1;
}
int *socketHandles=(int*)malloc(sizeof(int)*socketsNum);
if (socketHandles==NULL)
{
printf("error: failed to alloc socket handle memory\n");
return -1;
}
for (int i=0;i<socketsNum;i++)
{
socketHandles[i]=-1;
}
printf("creating %d sockets ...\n",socketsNum);
int createdSocketsNum=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socket(AF_INET,SOCK_DGRAM,IPPROTO_UDP);
if (socketHandle==-1)
{
int lastError=errno;
printf("warning: socket() failed: index: %d, error: %d\n",i+1,lastError);
continue;
}
sockaddr_in sockAddr; // 0.0.0.0:0
memset(&sockAddr,0,sizeof(sockAddr));
sockAddr.sin_family = AF_INET;
sockAddr.sin_addr.s_addr = htonl(INADDR_ANY);
sockAddr.sin_port = htons(0);
if (bind( socketHandle, (sockaddr*) &sockAddr, sizeof(sockAddr)) == -1)
{
int lastError=errno;
printf("warning: bind() failed: index: %d, error: %d\n",i+1,lastError);
close(socketHandle);
continue;
}
socketHandles[i]=socketHandle;
createdSocketsNum++;
}
printf("created %d sockets.\n",createdSocketsNum);
//test reading;
printf("testing reading ...\n");
int readableNumber=0;
int unreadableNumber=0;
int readingSkippedNumber=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
readingSkippedNumber++;
continue;
}
fd_set rset;
FD_ZERO(&rset);
FD_SET(socketHandle, &rset);
struct timeval timeout = {0, 0};
int retCode=select(socketHandle + 1, &rset, NULL, NULL, &timeout);
if (retCode==-1)
{
int lastError=errno;
printf("warning: select() failed: index: %d, error: %d\n",i+1,lastError);
}
else if (retCode==0)
{
unreadableNumber++;
}
else
{
readableNumber++;
}
}
printf("readable: %d, unreadable: %d, skipped: %d, total: %d\n",readableNumber,unreadableNumber,readingSkippedNumber,socketsNum);
//test writing
printf("testing writing ...\n");
int writableNumber=0;
int unwritableNumber=0;
int writingSkippedNumber=0;
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
writingSkippedNumber++;
continue;
}
fd_set wset;
FD_ZERO(&wset);
FD_SET(socketHandle, &wset);
struct timeval timeout = {0, 0};
int retCode=select(socketHandle + 1, NULL, &wset, NULL, &timeout);
if (retCode==-1)
{
int lastError=errno;
printf("warning: select() failed: index: %d, error: %d\n",i+1,lastError);
}
else if (retCode==0)
{
unwritableNumber++;
}
else
{
writableNumber++;
}
}
printf("writable: %d, unwritable: %d, skipped: %d, total: %d\n",writableNumber,unwritableNumber,writingSkippedNumber,socketsNum);
printf("closing ...\n");
for (int i=0;i<socketsNum;i++)
{
int socketHandle=socketHandles[i];
if (socketHandle==-1)
{
continue;
}
close(socketHandle);
}
free(socketHandles);
printf("completed!\n");
return 0;
}
Compile:
g++ TestSockets.cpp -ldl -g -ggdb -o TestSockets
Config:
ulimit -n 32768
Some typical results:
Good result of
./TestSockets 1500
:creating 1500 sockets ... created 1500 sockets. testing reading ... readable: 0, unreadable: 1500, skipped: 0, total: 1500 testing writing ... writable: 1372, unwritable: 128, skipped: 0, total: 1500 closing ... completed!
Bad result of
./TestSockets 1900
:creating 1900 sockets ... created 1900 sockets. testing reading ... warning: select() failed: index: 1797, error: 9 ...(more lines trimmed) warning: select() failed: index: 1820, error: 9 warning: select() failed: index: 1821, error: 22 readable: 0, unreadable: 1878, skipped: 0, total: 1900 testing writing ... warning: select() failed: index: 1641, error: 9 ...(more lines trimmed) warning: select() failed: index: 1660, error: 9 warning: select() failed: index: 1661, error: 22 writable: 1751, unwritable: 128, skipped: 0, total: 1900 closing ... completed!
Comment: because 1900>1751+128, it seems that the stack was damaged.
Bad result of
./TestSockets 2000
:creating 2000 sockets ... created 2000 sockets. testing reading ... Segmentation fault
More Investigation:
According to gdb information. It seems that the stack memory was damaged during running:
creating 2000 sockets ...
created 2000 sockets.
testing reading ...
Program received signal SIGSEGV, Segmentation fault.
0x08048b79 in main (argc=2, argv=0xffffd3b4) at TestSockets.cpp:78
78 int socketHandle=socketHandles[i];
(gdb) print socketHandles
$1 = (int *) 0x0
(gdb) info local
socketHandle = 0
rset = {fds_bits = {0 <repeats 32 times>}}
timeout = {tv_sec = 0, tv_usec = 0}
retCode = 0
i = 1601
socketsNum = 2000
unreadableNumber = 1601
unwritableNumber = 134514249
socketHandles = 0x0
createdSocketsNum = 2000
readableNumber = 0
readingSkippedNumber = 0
writableNumber = -136436764
writingSkippedNumber = 0
(gdb) info stack
#0 0x08048b79 in main (argc=2, argv=0xffffd3b4) at TestSockets.cpp:78