5

I'm using socket_server from this tutorial and the following code for a client and server:

Server:

-module(echo_server).
-export([start/0, loop/1]).

% echo_server specific code
start() ->
    spawn(socket_server, start, [?MODULE, 7000, {?MODULE, loop}]).

loop(Socket) ->
    case gen_tcp:recv(Socket, 0) of
        {ok, Message} ->
            Msg = binary_to_term(Message),
            case Msg of
                start ->
                    io:format("Got start message on socket ~p.~n", [Socket]),
                    send_count(Socket, 10),
                    gen_tcp:close(Socket);
                Other ->
                    io:format("Got message on socket ~p: ~p~n",
                              [Socket, Other])
            end;
        {error, closed} ->
            io:format("Got closed message on socket ~p.~n", [Socket]),
            ok;
        Error ->
            io:format("Got bad message: ~p on socket ~p.~n", [Error, Socket])
    end.

send_count(_Socket, 0) ->
    ok;
send_count(Socket, Num) ->
    io:format("Sending ~p to ~p.~n", [Num, Socket]),
    gen_tcp:send(Socket, term_to_binary(Num)),
    send_count(Socket, Num - 1).

Client:

-module(echo_client).
-export([start/0, do_stuff/0]).


send(Socket, Msg) ->
    gen_tcp:send(Socket, term_to_binary(Msg)).

start() ->
    dbg:tracer(),
    Pid = spawn(?MODULE, do_stuff, []),
    dbg:p(Pid, r).

do_stuff() ->
    case gen_tcp:connect("localhost", 7000, [binary, {packet, 0}]) of
        {ok, Socket} ->
            send(Socket, start),
            rx_loop(Socket);
        Error ->
            io:format("Error connecting to server: ~p~n", [Error])
    end.

rx_loop(Socket) ->
    receive
        {tcp, Socket, Message} ->
            Msg = binary_to_term(Message),
            io:format("Received message: ~p~n", [Msg]),
            rx_loop(Socket)
    after 5000 ->
            finish_loop(Socket)
    end.

finish_loop(Socket) ->
    receive
        {tcp, Socket, Message} ->
            Msg = binary_to_term(Message),
            io:format("Received message: ~p~n", [Msg]),
            rx_loop(Socket);
        {tcp_closed, Socket} ->
            io:format("Server terminated connection.~n"),
            exit(normal);
        Error ->
            io:format("Received bad message: ~p~n", [Error]),
            rx_loop(Socket)
    end.

I'm invoking echo_server:start() and echo_client:start() from different shells on the same system, in that order. Here's what I see:

The server seems to work just fine.

1>echo_server:start().
<0.39.0>
Got start message on socket #Port<0.2041>.
Sending 10 to #Port<0.2041>.
Sending 9 to #Port<0.2041>.
Sending 8 to #Port<0.2041>.
Sending 7 to #Port<0.2041>.
Sending 6 to #Port<0.2041>.
Sending 5 to #Port<0.2041>.
Sending 4 to #Port<0.2041>.
Sending 3 to #Port<0.2041>.
Sending 2 to #Port<0.2041>.
Sending 1 to #Port<0.2041>.

The client doesn't quite get all the messages right:

2> echo_client:start().
{ok,[{matched,nonode@nohost,1}]}
3> (<0.41.0>) << {code_server,{module,gen_tcp}}
(<0.41.0>) << {code_server,{module,inet_tcp}}
(<0.41.0>) << {#Ref<0.0.0.74>,{ok,<0.43.0>}}
(<0.41.0>) << {#Ref<0.0.0.76>,
               {ok,<<4,0,0,0,2,127,0,0,1,127,0,0,1,0,0,0,3,108,111,99,97,108,
                     104,111,115,116,0,105,112,54,45,108,111,99,97,108,104,
                     111,115,116,0,105,112,54,45,108,111,111,112,98,97,99,
                     107,0>>}}
(<0.41.0>) << {inet_async,#Port<0.2058>,0,ok}
(<0.41.0>) << {inet_reply,#Port<0.2058>,ok}
Received message: 10
3> (<0.41.0>) << {tcp,#Port<0.2058>,<<131,97,10>>}
Received message: 9
3> (<0.41.0>) << {io_reply,<0.25.0>,ok}
(<0.41.0>) << timeout
(<0.41.0>) << {tcp,#Port<0.2058>,<<131,97,9>>}
(<0.41.0>) << {io_reply,<0.25.0>,ok}
Received message: 8
Received message: 5
Received message: 4
Received message: 3
Received message: 2
Received message: 1
3> (<0.41.0>) << timeout
(<0.41.0>) << {tcp,#Port<0.2058>,<<131,97,8,131,97,7,131,97,6>>} %% <---This guy here
(<0.41.0>) << {io_reply,<0.25.0>,ok}
(<0.41.0>) << {tcp,#Port<0.2058>,<<131,97,5>>}
(<0.41.0>) << timeout
(<0.41.0>) << {io_reply,<0.25.0>,ok}
(<0.41.0>) << timeout
(<0.41.0>) << {tcp,#Port<0.2058>,<<131,97,4>>}
(<0.41.0>) << {io_reply,<0.25.0>,ok}
(<0.41.0>) << timeout
(<0.41.0>) << {tcp,#Port<0.2058>,<<131,97,3>>}
(<0.41.0>) << {io_reply,<0.25.0>,ok}
(<0.41.0>) << timeout
(<0.41.0>) << {tcp,#Port<0.2058>,<<131,97,2>>}
(<0.41.0>) << {io_reply,<0.25.0>,ok}
(<0.41.0>) << timeout
(<0.41.0>) << {tcp,#Port<0.2058>,<<131,97,1>>}
(<0.41.0>) << {io_reply,<0.25.0>,ok}
(<0.41.0>) << {tcp_closed,#Port<0.2058>}
(<0.41.0>) << timeout
Server terminated connection.
3> (<0.41.0>) << timeout
(<0.41.0>) << {io_reply,<0.25.0>,ok}
(<0.41.0>) << timeout

If I look at the network traffic on lo, I see nice clean PSH/ACK pairs for each number counting down. The line I pointed at above shows two packets showing up in a single message: 7 and 6. Those came across the network as two separate TCP packets. Anyone have any idea why they're being smushed together or how to un-smush them?

nmichaels
  • 49,466
  • 12
  • 107
  • 135

1 Answers1

7

Why they're being "smushed" at the receiving end: Because TCP is a streaming protocol, and there's no requirement for send/recv calls to have a 1-1 correspondence with network packets (even if they arrive that way over the wire).

How to "un-smush" them: Either change your TCP protocol to include a message delimiter, so you can extract messages from the stream without needing to know where the packet boundaries were; or use UDP instead of TCP.

Jim Lewis
  • 43,505
  • 7
  • 82
  • 96
  • Wow, that's new. I'd much rather not use UDP, since then I'd have to do all the nice things TCP does myself (like guaranteeing in-order delivery.) Can you be more specific about how to get a message delimiter in TCP? Is there something built into gen_tcp that I can use, or do I have to roll my own? – nmichaels Nov 17 '10 at 22:06
  • @Nathon: I don't know Erlang, so I can't speak to the capabilities of gen_tcp. You'll almost certainly have to manage some sort of buffer for the messages you're reading from the TCP layer, then have a way of detecting when a complete message is available. If your messages are all the same length, you're almost done! It's also common to reserve the first few bytes of a message as a message lenth field. Or if that's not convenient on the sending side, you can define a sequence of bytes that won't appear in your data stream, then look for that sequence to detect when a full message is ready. – Jim Lewis Nov 17 '10 at 22:15
  • 4
    Ahah! The {packet, N} option uses the first N bytes of the packet as a length field. There's a long list of options for that field in http://www.erlang.org/doc/man/inet.html#setopts-2 – nmichaels Nov 17 '10 at 22:37
  • 1
    {packet, N} is a fast way to fix the problem. A more general path is to request coding of erlang terms directly or similar. – I GIVE CRAP ANSWERS Nov 18 '10 at 01:08