Why does read block on a pipe until the write end is closed?

Question

I'm trying to bolster my understanding of things related to fork, exec, dup, and redirecting stdin/stdout/stderr by writing the following popen-type function:

// main.c
#include <pthread.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>

#define INVALID_FD (-1)

typedef enum PipeEnd {
  READ_END  = 0,
  WRITE_END = 1
} PipeEnd;

typedef int Pipe[2];

/** Encapsulates information about a created child process. */
typedef struct popen2_t {
  bool  success;  ///< true if the child process was spawned.
  Pipe  stdin;    ///< parent -> stdin[WRITE_END] -> child's stdin
  Pipe  stdout;   ///< child -> stdout[WRITE_END] -> parent reads stdout[READ_END]
  Pipe  stderr;   ///< child -> stderr[WRITE_END] -> parent reads stderr[READ_END]
  pid_t pid;      ///< child process' pid
} popen2_t;

/** dup2( p[pe] ) then close and invalidate both ends of p */
static void dupFd( Pipe p, const PipeEnd pe, const int fd ) {
  dup2( p[pe], fd);
  close( p[READ_END] );
  close( p[WRITE_END] );
  p[READ_END] = INVALID_FD;
  p[WRITE_END] = INVALID_FD;
}

popen2_t popen2( const char* cmd ) {
  popen2_t r = { false, { INVALID_FD, INVALID_FD } };

  if ( -1 == pipe( r.stdin ) ) { goto end; }
  if ( -1 == pipe( r.stdout ) ) { goto end; }
  if ( -1 == pipe( r.stderr ) ) { goto end; }

  switch ( (r.pid = fork()) ) {
    case -1: // Error
      goto end;

    case 0: // Child process
      dupFd( r.stdin, READ_END, STDIN_FILENO );
      dupFd( r.stdout, WRITE_END, STDOUT_FILENO );
      dupFd( r.stderr, WRITE_END, STDERR_FILENO );

      {
        char* argv[] = { "sh", "-c", (char*)cmd, NULL };

        if ( -1 == execvp( argv[0], argv ) ) { exit(0); }
      }
  }

  // Parent process
  close( r.stdin[READ_END] );
  r.stdin[READ_END] = INVALID_FD;
  close( r.stdout[WRITE_END] );
  r.stdout[WRITE_END] = INVALID_FD;
  close( r.stderr[WRITE_END] );
  r.stderr[WRITE_END] = INVALID_FD;
  r.success = true;

end:
  if ( ! r.success ) {
    if ( INVALID_FD != r.stdin[READ_END] ) { close( r.stdin[READ_END] ); }
    if ( INVALID_FD != r.stdin[WRITE_END] ) { close( r.stdin[WRITE_END] ); }
    if ( INVALID_FD != r.stdout[READ_END] ) { close( r.stdout[READ_END] ); }
    if ( INVALID_FD != r.stdout[WRITE_END] ) { close( r.stdout[WRITE_END] ); }
    if ( INVALID_FD != r.stderr[READ_END] ) { close( r.stderr[READ_END] ); }
    if ( INVALID_FD != r.stderr[WRITE_END] ) { close( r.stderr[WRITE_END] ); }

    r.stdin[READ_END] = r.stdin[WRITE_END] =
      r.stdout[READ_END] = r.stdout[WRITE_END] =
      r.stderr[READ_END] = r.stderr[WRITE_END] = INVALID_FD;
  }

  return r;
}

int main( int argc, char* argv[] ) {
  popen2_t p = popen2( "./child.out" );

  {
    int status = 0;


    sleep( 2 );

    {
      char buf[1024] = { '\0' };

      read( p.stdout[READ_END], buf, sizeof buf );
      printf( "%s", buf );
    }

    //pid_t wpid = waitpid( p.pid, &status, 0 );
    //return wpid == p.pid && WIFEXITED( status ) ? WEXITSTATUS( status ) : -1;
  }
}

// child.c
#include <stdio.h>
#include <unistd.h>

int main( int argc, char* argv[] ) {
  printf( "%s:%d\n", __FILE__, __LINE__ );
  sleep( 1 );
  printf( "%s:%d\n", __FILE__, __LINE__ );
  sleep( 1 );
  printf( "%s:%d\n", __FILE__, __LINE__ );
  sleep( 1 );
  printf( "%s:%d\n", __FILE__, __LINE__ );
  sleep( 1 );
  return 0;
}

Compilation and execution:

$ gcc --version && gcc -g ./child.c -o ./child.out && gcc -g ./main.c && ./a.out
gcc (Debian 6.3.0-18+deb9u1) 6.3.0 20170516
Copyright (C) 2016 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

./child.c:6
./child.c:8
./child.c:10
./child.c:12
$

My question is about the read() - I don't quite grok why does the read() is seemingly block until the child process has completed (thereby closing its end of the pipe)?

Is it coincidence? You can see I've tried to "make" the main process do its read in the middle of the child process' execution with the sleep( 2 ) statement.

In total, the child process dumps 50 chars to its (redirected) stdout. Isn't it possible that the main process might do its read() in the middle of the child's execution and read only N of 50 of those chars, and that therefore the main process' printf() wouldn't print all four lines from the child process in its entirety?

(Functionality-wise, everything is fine - my question is to better my understanding of read())

@NateEldredge - I can't say that this is _reasonable_, but I was imagining that a pipe might be like a TCP socket, where if there's _anything_ in the pipe, you could read some subset of it, oblivious to whether the other end was done or not. — StoneThrow, Oct 01 '20 at 23:25
@StoneThrow: That's exactly what does happen. But the child is using `printf` to print, which is fully buffered when standard output is not a terminal. So although the printf completes, `write()` is not actually called until the buffer is flushed, which happens when the child exits. — Nate Eldredge, Oct 01 '20 at 23:27
@NateEldredge - thanks; I'm gonna go experiment with that. I assume, then, that using `write()` or some such non-buffered alternative could trigger my "subset of the whole 50 bytes" scenario. — StoneThrow, Oct 01 '20 at 23:32
@StoneThrow: Yes indeed. In that case, you will see `read` return immediately with however many bytes are available. (You have to check the return value to see how many bytes that is.) If you want more bytes after that, call `read()` again, normally in a loop, and it will block until either some bytes are available or the pipe is closed. — Nate Eldredge, Oct 01 '20 at 23:35
In all normal circumstances you want to keep reading in that manner. As your program stands, the parent will close the pipe before the child is done writing, which will cause the child to be killed with SIGPIPE when it writes some more. — Nate Eldredge, Oct 01 '20 at 23:38
@NateEldredge - You speak the truth! I confirmed your claim with the discussed experiment. Nice! This back-and-forth clarified a bunch of stuff in my head, and I also came to better understand what _buffered_ writing can result in. — StoneThrow, Oct 01 '20 at 23:48

Barmar · Accepted Answer · 2020-10-01T23:29:42.837

3

By default, stdout is fully buffered when it's not writing to a terminal. So nothing is being written to the pipe by your printf() calls in the child until the buffer is flushed. This will happen when the buffer fills (probably 1K or 4K bytes) or the process exits.

You can flush the buffer immediately with fflush(stdout);. Add that after each of your printf() calls and you'll be able to read them in the parent without waiting for the process to exit.

edited Oct 01 '20 at 23:29

answered Oct 01 '20 at 23:21

Barmar

741,623
53
500
612

Thank you also - same response as to NateEldredge - I'm gonna go experiment on the basis of your explanation. – StoneThrow Oct 01 '20 at 23:33
Was able to confirm by experiment - thank you for the explanation! – StoneThrow Oct 01 '20 at 23:49

Why does read block on a pipe until the write end is closed?

1 Answers1

Linked