3

I'm working on a custom shell that can handle multiple pipes. But every time I execute a new pipeline and check the process with ls -l /proc/pid/fd I get something like in the picture below and the list keeps expanding with every new pipeline executed:

screenshot of terminal

Question: Is this considered as a fd leak? And how do I fix it?

Here's a code snippet for my pipeline execution:

enum PIPES {READ, WRITE};

void execute_pipeline(char*** pipeline)
{
    int fd[2];
    int fd_backup = 0;
    pid_t child_pid;

    while (*pipeline != '\0')
    {
        pipe(fd);
        child_pid = fork();

        if(child_pid == -1)
        {
            perror("fork");
            exit(1);
        }
        else if(child_pid == 0)
        {
            dup2(fd_backup, 0);// (old, new)
            close(fd[READ]);

            if(*(pipeline + 1) != '\0')
            {
                dup2(fd[WRITE], 1);
            }
            execvp((*pipeline)[0], *pipeline);
            exit(1);
        }
        else// Parent process
        {
            wait(NULL);
            close(fd[WRITE]);
            fd_backup = fd[READ];
            pipeline++;
        }
    }
}

EDIT

An example how to call execute_pipeline:

char *ls[] = {"ls", "-l", NULL};
char *sort[] = {"sort", "-r", NULL};
char *head[] = {"head", "-n", "3", NULL};
char **pipeline[] = {ls, sort, head, NULL};

execute_pipeline(pipeline);
RendoJack
  • 366
  • 1
  • 5
  • 17
  • 1
    Although `*(pipeline +1)` is just another way of writing `pipeline[1]`, the phrase `(*pipeline)[1]` does not mean anything. (that I am aware of.) It does not compile. Did you mean to do this? The subscripted value is not an array, pointer or vector. (in this context) – ryyker Oct 15 '18 at 19:16
  • 2
    Can you post some [MCVE] to illustrate how `execute_pipeline` is called? – Mathieu Oct 15 '18 at 19:44
  • Added example to my post. – RendoJack Oct 15 '18 at 19:54
  • Seeing `char*** pipeline` is a sign that something here is really, really odd. `*pipeline == '\0'` is likewise an error, you can't compare `char**` to a zero character. Maybe you mean `NULL`? – tadman Oct 15 '18 at 19:57
  • 1
    A simple struct to encapsulate each command would be better than slinging raw `char**` arrays. – tadman Oct 15 '18 at 19:58
  • 1
    @kamil: none of that is correct. Close means close. When you close an fd, it is closed. That doesn't mean that the thing that was open will disappear, since it might be opened by a different process or with a different fd. But the fd is closed and open will always give you the lowest available fd, so it will get reusef. – rici Oct 16 '18 at 02:33

2 Answers2

3

As tadman pointed out, it is easier to use a command struct to pass things around.

We can't [well, we could but shouldn't] do a wait [in the parent] during pipe construction. That has to be a separate loop later. We would hang the parent after the first child is created.

If the first child had a large amount of output, the kernel pipe buffers might fill up and the first child would block. But, since the second child has not been created, there is nothing to read/drain the first child's output and unblock it.

Also, it is important to close the pipe units after doing dup2 and ensure the previous pipe stage units are closed in the parent.

Here's a refactored version that does all that.

As to your original issue with file descriptor leakage, I think I fixed that by adding some more close calls. The program has some self verification code on this:

#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
#include <dirent.h>
#include <sys/wait.h>

#define FREEME(ptr_) \
    do { \
        if (ptr_ == NULL) \
            break; \
        free(ptr_); \
        ptr_ = NULL; \
    } while (0)

#define CLOSEME(fd_) \
    do { \
        if (fd_ < 0) \
            break; \
        close(fd_); \
        fd_ = -1; \
    } while (0)

// command control
typedef struct {
    unsigned int cmd_opt;               // options
    int cmd_cldno;                      // child number

    char *cmd_buf;                      // command buffer
    int cmd_argc;                       // argument count
    char **cmd_argv;                    // arguments

    int cmd_pipe[2];                    // pipe units
    pid_t cmd_pid;                      // child pid number
    int cmd_status;                     // child status
} cmd_t;

#define CMD_FIRST       (1u << 0)
#define CMD_LAST        (1u << 1)

char linebuf[1000];
int cmdcount;
cmd_t *cmdlist;

int opt_d;
int opt_l;

#define dbg(fmt_...) \
    do { \
        if (opt_d) \
            printf(fmt_); \
    } while (0)

// show open fd's
void
fdshow1(int cldid)
{
    char buf[100];

    fprintf(stderr,"CLD: %d\n",cldid);
    sprintf(buf,"ls -l /proc/%d/fd 1>&2",getpid());
    system(buf);
}

// show open fd's
void
fdshow2(int cldid)
{
    char dir[100];
    char lnkfm[1000];
    char lnkto[1000];
    int len;
    DIR *xf;
    struct dirent *ent;
    char *bp;
    char obuf[1000];

    sprintf(dir,"/proc/%d/fd",getpid());
    xf = opendir(dir);

    bp = obuf;
    bp += sprintf(bp,"%d:",cldid);

    while (1) {
        ent = readdir(xf);
        if (ent == NULL)
            break;

        if (strcmp(ent->d_name,".") == 0)
            continue;
        if (strcmp(ent->d_name,"..") == 0)
            continue;

        sprintf(lnkfm,"%s/%s",dir,ent->d_name);
        len = readlink(lnkfm,lnkto,sizeof(lnkto));
        lnkto[len] = 0;

        if (strstr(lnkto,"pipe") != 0)
            bp += sprintf(bp," %s-->%s",ent->d_name,lnkto);

        switch (ent->d_type) {
        case DT_FIFO:
            break;
        }
    }

    bp += sprintf(bp,"\n");
    fputs(obuf,stderr);
    fflush(stderr);

    closedir(xf);
}

// show open fd's
void
fdshow(int cldid)
{

    fdshow2(cldid);
}

// pipeadd -- add single command to pipe
void
pipeadd(char *buf)
{
    char *cp;
    char *bp;
    char *sv;
    cmd_t *cmd;

    dbg("pipeadd: buf='%s'\n",buf);

    cmdlist = realloc(cmdlist,(cmdcount + 1) * sizeof(cmd_t));

    cmd = &cmdlist[cmdcount];
    memset(cmd,0,sizeof(cmd_t));
    cmd->cmd_pipe[0] = -1;
    cmd->cmd_pipe[1] = -1;

    cmd->cmd_cldno = cmdcount;
    ++cmdcount;

    bp = buf;
    while (1) {
        cp = strtok_r(bp," \t",&sv);
        bp = NULL;
        if (cp == NULL)
            break;

        cmd->cmd_argv = realloc(cmd->cmd_argv,
            (cmd->cmd_argc + 2) * sizeof(char **));

        cmd->cmd_argv[cmd->cmd_argc + 0] = cp;
        cmd->cmd_argv[cmd->cmd_argc + 1] = NULL;

        cmd->cmd_argc += 1;
    }
}

// pipesplit -- read in and split up command
void
pipesplit(void)
{
    char *cp;
    char *bp;
    char *sv;
    cmd_t *cmd;

    printf("> ");
    fflush(stdout);

    fgets(linebuf,sizeof(linebuf),stdin);

    cp = strchr(linebuf,'\n');
    if (cp != NULL)
        *cp = 0;

    bp = linebuf;
    while (1) {
        cp = strtok_r(bp,"|",&sv);
        bp = NULL;

        if (cp == NULL)
            break;

        pipeadd(cp);
    }

    cmd = &cmdlist[0];
    cmd->cmd_opt |= CMD_FIRST;

    cmd = &cmdlist[cmdcount - 1];
    cmd->cmd_opt |= CMD_LAST;

    if (opt_d) {
        for (cmd_t *cmd = cmdlist;  cmd < &cmdlist[cmdcount];  ++cmd) {
            dbg("%d:",cmd->cmd_cldno);
            for (int argc = 0;  argc < cmd->cmd_argc;  ++argc)
                dbg(" '%s'",cmd->cmd_argv[argc]);
            dbg("\n");
        }
    }
}

// pipefork -- fork elements of pipe
void
pipefork(void)
{
    cmd_t *cmd;
    int fdprev = -1;
    int fdpipe[2] = { -1, -1 };

    for (cmd = cmdlist;  cmd < &cmdlist[cmdcount];  ++cmd) {
        // both parent and child should close output side of previous pipe
        CLOSEME(fdpipe[1]);

        // create a new pipe for the output of the current child
        if (cmd->cmd_opt & CMD_LAST) {
            fdpipe[0] = -1;
            fdpipe[1] = -1;
        }
        else
            pipe(fdpipe);

        cmd->cmd_pid = fork();
        if (cmd->cmd_pid < 0) {
            printf("pipefork: fork fail -- %s\n",strerror(errno));
            exit(1);
        }

        // parent the input side for the next pipe stage
        if (cmd->cmd_pid != 0) {
            CLOSEME(fdprev);
            fdprev = fdpipe[0];
            continue;
        }

        // connect up our input to previous pipe stage's output
        if (fdprev >= 0) {
            dup2(fdprev,0);
            CLOSEME(fdprev);
        }

        // connect output side of our pipe to stdout
        if (fdpipe[1] >= 0) {
            dup2(fdpipe[1],1);
            CLOSEME(fdpipe[1]);
        }

        // child doesn't care about reading its own output
        CLOSEME(fdpipe[0]);

        if (opt_l)
            fdshow(cmd->cmd_cldno);

        // off we go ...
        execvp(cmd->cmd_argv[0],cmd->cmd_argv);
    }

    CLOSEME(fdpipe[0]);
    CLOSEME(fdpipe[1]);

    if (opt_l)
        fdshow(-1);
}

// pipewait -- wait for pipe stages to complete
void
pipewait(void)
{
    pid_t pid;
    int status;
    int donecnt = 0;

    while (donecnt < cmdcount) {
        pid = waitpid(0,&status,0);
        if (pid < 0)
            break;

        for (cmd_t *cmd = cmdlist;  cmd < &cmdlist[cmdcount];  ++cmd) {
            if (pid == cmd->cmd_pid) {
                cmd->cmd_status = status;
                ++donecnt;
                break;
            }
        }
    }
}

// pipeclean -- free all storage
void
pipeclean(void)
{

    for (cmd_t *cmd = cmdlist;  cmd < &cmdlist[cmdcount];  ++cmd)
        FREEME(cmd->cmd_argv);
    FREEME(cmdlist);
    cmdcount = 0;
}

// main -- main program
int
main(int argc,char **argv)
{
    char *cp;

    --argc;
    ++argv;

    for (;  argc > 0;  --argc, ++argv) {
        cp = *argv;
        if (*cp != '-')
            break;

        switch (cp[1]) {
        case 'd':
            opt_d = ! opt_d;
            break;

        case 'l':
            opt_l = ! opt_l;
            break;

        default:
            break;
        }
    }

    while (1) {
        pipesplit();
        pipefork();
        pipewait();
        pipeclean();
    }

    return 0;
}
Craig Estey
  • 30,627
  • 4
  • 24
  • 48
2

Let's be accurate regarding the file descriptors and bear in mind that during fork and execvp file descriptors are inherited by child processes unless marked as w/ CLOEXEC flag. Check the man pages:

fork(2): The child inherits copies of the parent's set of open file descriptors. Each file descriptor in the child refers to the same open file description (see open(2)) as the corresponding file descriptor in the parent.

open(2): By default, the new file descriptor is set to remain open across an execve(2) (i.e., the FD_CLOEXEC file descriptor flag described in fcntl(2) is initially disabled); the O_CLOEXEC flag, described below, can be used to change this default. The file offset is set to the beginning of the file (see lseek(2)).

But this behaviour, I supposed, exactly what you were relying on by calling fork after pipe...

No other words, lets' draw this:

             stdin, stdout                                                                   
                   /\                                                                        
                  /  \                                                                       
                 /    \                                                                      
                /      \                                                                     
               / R; W;  \                                                                    
              /          \                                                                   
       Child -            - Parent                                                           
 stdin/out, R(del),W       stdin/out, R(fd_backup), W(del)                                   
                                  /\                                                         
                                 /  \                                                        
                                /    \                                                       
                               /      \                                                      
                              / R1; W1;\                                                     
                             /          \                                                    
                      Child -            - Parent                                            
               stdin/out, R(fd_backup),   stdin, stdout                                      
                            R1(del), W1   R(fd_backup - old);                                
                                          R1(fd_backup - new); W1(del)                       
                                                    / \                                      
                                                   /   \                                     
                                                  /     \                                    
                                                 /R2; W2;\                                   
                                                /         \                                  
                                               /           \                                 
                                        Child -             - Parent                         
                                 stdin, stdout,             stdin, stdout                    
                                R(fd_backup - old),         R (fd_backup - old),             
                                R1(fd_backup - new),        R1 (fd_backup - new),            
                                R2(del),W2                  R2 (fd_backup - newest!),                                                                                            

I hope the picture is self explanatory.

Child processes will die anyway and all their fds will be closed (so no issue with them). But the parent process is left with 3 opened fds and they keep growing with each pipe executed.

vcargats
  • 41
  • 1
  • 2