0

I have this server application that sets up a local communication stream socket for clients to connect to. If a second instance of the server is launched and it tries to bind to the same address (file name), bind() should fail with EADDRINUSE. But it does not. Why?

Here is pared-down code that showcases the problem:

#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/socket.h>
#include <sys/un.h>

int make_socket(const char *filename, int style) {
   struct sockaddr_un name;
   int sock;
   size_t size;

   sock = socket(PF_LOCAL, style, 0);
   if (sock < 0) {
      perror("socket");
      exit(EXIT_FAILURE);
   }

   if ((!filename) || (filename[0] == '\0')) {
      return sock;
   }

   name.sun_family = AF_LOCAL;
   strncpy(name.sun_path, filename, sizeof(name.sun_path));
   name.sun_path[sizeof(name.sun_path) - 1] = '\0';
   size = SUN_LEN(&name);

   if (bind(sock, (struct sockaddr *) &name, size) < 0) {
      perror("bind");
      exit(EXIT_FAILURE);
   }

   return sock;
}

int make_stream_socket(const char *filename) {
   return make_socket(filename, SOCK_STREAM);
}

#define TS_SERVER  "/tmp/socket-server"

int main(int argc, char *argv[]) {
   int sock_server, sock_client;
   struct sockaddr_un name_client;
   size_t size_name_client;
   int i;
   fd_set client_set, selected_set;
   int quitting = 0;

   fprintf(stderr, "Server: starting up\n");
   unlink(TS_SERVER);
   sock_server = make_stream_socket(TS_SERVER);
   fprintf(stderr, "Server: accepting connections on %d\n", sock_server);
   if (0 > listen(sock_server, 1)) {
      perror("listen(connection requests)");
      exit(EXIT_FAILURE);
   }
   FD_ZERO(&client_set);
   FD_SET(sock_server, &client_set);
   while (!quitting) {
      fprintf(stderr, "Server: waiting for connections\n");
      selected_set = client_set; //shallow-clone client_set
      if (0 > select(FD_SETSIZE, &selected_set, NULL, NULL, NULL)) {
         perror("select(for readability)");
         exit(EXIT_FAILURE);
      }

      fprintf(stderr, "Server: connection(s) received\n");
      for (i = 0; i < FD_SETSIZE; ++i) {
         if (FD_ISSET(i, &selected_set)) {
            if (i == sock_server) {
               fprintf(stderr, "Server: accepting connection\n");
               size_name_client = sizeof(name_client);
               sock_client = accept(sock_server,
                                    (struct sockaddr *) &name_client, 
                                    (socklen_t *) &size_name_client);
               //Ubuntu Launchpad bug 1463553: accept() returns
               // an off-by-one size_name_client
               size_name_client = SUN_LEN(&name_client);
               if (sock_client < 0) {
                  perror("accept(connection request)");
                  exit(EXIT_FAILURE);
               }
               fprintf(stderr, "Server: accepted connection request from '%s' on %d\n",
                               name_client.sun_path, sock_client);
               quitting = 1;
               close(sock_client);
               break; //out of the for
            } //if (i == sock_server)
         } //if (FD_ISSET(i, &selected_set))
      } //for
   } //while

   fprintf(stderr, "Server: shutting down\n");
   close(sock_server);
   unlink(TS_SERVER);

   exit(EXIT_SUCCESS);
}

After compilation, run a first instance from the command line, then run a second instance from another command line. The second instance should fail on bind(), but does not.

Urhixidur
  • 2,270
  • 2
  • 19
  • 24
  • 1
    we really need the actual code. there are certain socket parameters that enable such re-use, such as: 'SO_REUSEADDR' and 'SO_REUSEPORT' – user3629249 Jun 09 '15 at 21:10
  • Note: bind() connects a socket to a specific port, not to some file name – user3629249 Jun 09 '15 at 21:12
  • I used `getsockopt` to check `EADDRINUSE`, and it is set to its default 0 (meaning "no") as expected. As for binding to a *port*, that is true only for `PF_INET`, not for `PF_LOCAL` (a.k.a. `PF_UNIX`, `PF_FILE`) as is the case here. – Urhixidur Jun 09 '15 at 21:17
  • @user3629249 For Unix Domain Sockets `AF_UNIX` or `AF_LOCAL` a file name is used as the network address. – nategoose Jun 09 '15 at 21:18
  • I do have a pared-down version of the server code that can showcase the problem, but it's too long for a comment box. (In my previous comment, I meant `SO_REUSEADDR`, not `EADDRINUSE`) – Urhixidur Jun 09 '15 at 21:36
  • Perhaps you can post just the code around bind()? – Mike Andrews Jun 09 '15 at 21:44
  • Edit the question and put the code in there (not in the comments). – kaylum Jun 09 '15 at 22:52

1 Answers1

2

Because:

  unlink(TS_SERVER);

You remove the existing socket file, which allows a new one to be created in its place.

davmac
  • 20,150
  • 1
  • 40
  • 68
  • The problem with dropping the `unlink` (which I would have expected to fail, the filename being locked by the first server instance) is that if the server dies unexpectedly and leaves the filename in place, the next time it starts up, it will fail to `bind` for the same reason. A related thread is 5339200 ([link](http://stackoverflow.com/questions/5339200/how-to-create-a-single-instance-application-in-c-or-c)) – Urhixidur Jun 10 '15 at 18:10
  • 1
    I assume that if the bind fails, you can then try to connect to the socket to see if an existing process is listening on it; if that fails, you could delete the socket "file" and then perform the bind again. But I don't think there's any way to do this atomically. – davmac Jun 11 '15 at 08:25
  • 1
    Yeah, that's what I ended up doing: on an `EADDRINUSE bind()` failure, try to `connect()`; if I get `ECONNREFUSED`, `unlink()` the socket file and try again. If `connect()` succeeds, gracefully exit to preserve the single instance. – Urhixidur Jun 15 '15 at 13:23