0

I'm taking a course in OS and completely new to linux. I'm supposed to implement a shell program which supports: Running foreground/background processes, piping 2 commands only.

Assuming there is a main function which runs in an infinite loop and parses input commands to produce {char** arglist} correctly - I am to implement a function process_arglist to do the command.

I'm having a few problems which I can't address the source.

First, after performing a single pipe command, for example ls -l | less, any further command will not print.

Second, when i run multiple processes in the background and start playing with ps, kill commands to see zombie process behavior- I see that I'm having zombie processes stacked on the process list. I wish to prevent having zombies as soon as possible.

What am I doing wrong?

#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <signal.h>

#define PRINT_ERROR fprintf(stderr, "%s\n", strerror(errno)) /*prints error message to stderr according to current errno*/

int wait_confirm = 1; /* should parent wait for child or not */
int pipe_confirm = 0; /* does the command contain pipe symbol */
int pipe_index; /* which index in arglist is the pipe symbol */




void do_pipe(char** arglist, int pipe_index){
    int fd[2];
    int exit_code;
    pid_t writer_pid, reader_pid;
    arglist[pipe_index] = NULL;
    if(pipe(fd)<0){
        PRINT_ERROR;
        exit(1);
    }
    if ((writer_pid = fork()) < 0){
        PRINT_ERROR;
        exit(1);
    }
    else if (writer_pid == 0){ /* Lefthand-Side of pipe enters here */
        dup2(fd[1], STDOUT_FILENO);
        close(fd[0]);
        close(fd[1]);
        execvp(arglist[0], arglist);
        PRINT_ERROR; /* we get here if an error occured on execvp */
        exit(1);
    }
    else { /* parent enters here */
        if ((reader_pid = fork()) < 0){
            PRINT_ERROR;
            exit(1);
        }
        else if (reader_pid == 0){ /*Righthand-side of pipe enters here */
            dup2(fd[0], STDIN_FILENO);
            close(fd[1]);
            close(fd[0]);
            arglist += pipe_index+1; /* arglist points to the command on the right of '|' */
            execvp(arglist[0], arglist);
            PRINT_ERROR;
            exit(1);
        }
        else{ /*parent enters here*/
            close(fd[0]);
            close(fd[1]);
            waitpid(reader_pid, &exit_code, 0); /* wait for second command to complete */
        }
    }
}


void prevent_zombies(int signum){
    wait(NULL);
}

// arglist - a list of char* arguments (words) provided by the user
// it contains count+1 items, where the last item (arglist[count]) and *only* the last is NULL
// RETURNS - 1 if should continue, 0 otherwise
int process_arglist(int count, char** arglist){
    int i=0;
    int exit_code;
    /* Set arglist appropriately so we can pass it to execvp without '&', if exists. */
    while(arglist[i]!=NULL){
        if (strcmp(arglist[i], "&")==0){
            wait_confirm = 0;
            arglist[i] = NULL;
        }
        else if (strcmp(arglist[i], "|") == 0){
            pipe_confirm = 1;
            pipe_index = i;
        }
        i++;
    }
    if(pipe_confirm){ /* command is indeed a pipe command */
        do_pipe(arglist, pipe_index);
    }
    else {
        int pid = fork();
        if (pid==0){ /* child enters here */
            execvp(arglist[0], arglist);
            PRINT_ERROR;
        }
        else{ /*parent enters here*/
            if(wait_confirm){
                wait(&exit_code);
            }
            else{
                signal(SIGCHLD, prevent_zombies);
            }
        }
    }
    return 1;
}
Tamir Shalev
  • 161
  • 6
  • Using global variables for `pipe_index`, `pipe_confirm` and `wait_confirm` is bad. You have to reset them to known states at the start of each call to `process_arglist()`. Also, the `count` argument to `process_arglist()` is unused; you may as well omit it. These probably aren't all the problems you've got, but they're certainly some of them. – Jonathan Leffler Apr 12 '20 at 14:43
  • I see what you mean, thank you. Could you be more specific to how this is related to zombies stacking up and missing prints after performing a pipe command? – Tamir Shalev Apr 12 '20 at 15:46

1 Answers1

0

Given that you've not provided an MCVE (Minimal, Complete, Verifiable Example) (or MRE or whatever name SO now uses) or an SSCCE (Short, Self-Contained, Correct Example), I've had to create a main() function to run my adaptation of your code. It also makes minimal use of the error reporting code available in my SOQ (Stack Overflow Questions) repository on GitHub as files stderr.c and stderr.h in the src/libsoq sub-directory.

#define _GNU_SOURCE
#include "stderr.h"
#include <assert.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/wait.h>
#include <unistd.h>

#define EXEC_ERROR(cmd)   fprintf(stderr, "failed to execute %s (%d: %s)\n", cmd, errno, strerror(errno))
#define PRINT_ERROR(func) fprintf(stderr, "function %s() failed (%d: %s)\n", func, errno, strerror(errno))

static int wait_confirm = 1;
static int pipe_confirm = 0;
static int pipe_index;

static_assert(sizeof(pid_t) == sizeof(int), "sizeof(pid_t) != sizeof(int) - redo format strings");

static void print_command(int cmdnum, char **argv)
{
    fprintf(stderr, "%d: command %d:", getpid(), cmdnum + 1);
    while (*argv != NULL)
        fprintf(stderr, " %s", *argv++);
    putc('\n', stderr);
}

static void do_pipe(char **arglist, int pipe_index)
{
    int fd[2];
    pid_t writer_pid, reader_pid;
    arglist[pipe_index] = NULL;
    if (pipe(fd) < 0)
    {
        PRINT_ERROR("pipe");
        exit(1);
    }
    if ((writer_pid = fork()) < 0)
    {
        PRINT_ERROR("fork - writer");
        exit(1);
    }
    else if (writer_pid == 0)
    {
        dup2(fd[1], STDOUT_FILENO);
        close(fd[0]);
        close(fd[1]);
        execvp(arglist[0], arglist);
        EXEC_ERROR(arglist[0]);
        exit(1);
    }
    else if ((reader_pid = fork()) < 0)
    {
        PRINT_ERROR("fork - reader");
        exit(1);
    }
    else if (reader_pid == 0)
    {
        dup2(fd[0], STDIN_FILENO);
        close(fd[1]);
        close(fd[0]);
        arglist += pipe_index + 1;
        execvp(arglist[0], arglist);
        EXEC_ERROR(arglist[0]);
        exit(1);
    }
    else
    {
        close(fd[0]);
        close(fd[1]);
        fprintf(stderr, "%d: writer %d, reader %d\n", getpid(), writer_pid, reader_pid);
        int status = 0xFFFF;
        int corpse = waitpid(reader_pid, &status, 0);
        fprintf(stderr, "%d: reader %d exited with status 0x%.4X\n", getpid(), corpse, status);
    }
}

static void dec_str(char *target, int value, int width)
{
    char *pos = target + width;
    *--pos = value % 10 + '0';
    value /= 10;
    while (pos > target && value > 0)
    {
        *--pos = value % 10 + '0';
        value /= 10;
    }
    while (pos > target)
        *--pos = ' ';
}

static void hex_str(char *target, int value, int width)
{
    char *pos = target + width;
    while (pos > target)
    {
        *--pos = "0123456789ABCDEF"[value % 16];
        value /= 16;
    }
}

static void prevent_zombies(int signum)
{
    int status = 0xFFFF;
    int corpse = wait(&status);
    char message[] = "XXXXX: PID XXXXX exited with status 0xXXXX (signal XX)\n";
    dec_str(&message[ 0], getpid(), 5);
    dec_str(&message[11], corpse,   5);
    hex_str(&message[38], status,   4);
    dec_str(&message[51], signum,   2);
    write(2, message, strlen(message));
    signal(signum, SIG_DFL);
}

static void process_arglist(char **arglist)
{
    wait_confirm = 1;
    pipe_confirm = 0;

    for (int i = 0; arglist[i] != NULL; i++)
    {
        if (strcmp(arglist[i], "&") == 0)
        {
            wait_confirm = 0;
            arglist[i] = NULL;
        }
        else if (strcmp(arglist[i], "|") == 0)
        {
            pipe_confirm = 1;
            pipe_index = i;
        }
    }

    int pid;
    if (pipe_confirm)
    {
        do_pipe(arglist, pipe_index);
    }
    else if ((pid = fork()) < 0)
    {
        PRINT_ERROR("fork");
    }
    else if (pid == 0)
    {
        execvp(arglist[0], arglist);
        EXEC_ERROR(arglist[0]);
        exit(1);
    }
    else if (wait_confirm)
    {
        fprintf(stderr, "%d: child %d (%s) launched\n", getpid(), pid, arglist[0]);
        int status = 0xFFFF;
        int corpse = waitpid(pid, &status, 0);
        fprintf(stderr, "%d: child %d (%s) exited with status 0x%.4X\n", getpid(), corpse, arglist[0], status);
    }
    else
    {
        fprintf(stderr, "%d: child %d running in background\n", getpid(), pid);
        signal(SIGCHLD, prevent_zombies);
    }
}

static void collect_zombies(void)
{
    int status;
    int corpse;
    while ((corpse = waitpid(-1, &status, WNOHANG)) > 0)
        fprintf(stderr, "%d: zombie %d exited with status 0x%.4X\n", getpid(), corpse, status);
}

int main(int argc, char **argv)
{
    err_setarg0(argv[0]);
    int no_zombies = 0;
    /* Can't reuse commands because process_arglist() modifies them */
    char *tty    = ttyname(0);
    size_t srclen = strlen(err_getarg0()) + sizeof(".c");
    char *source = malloc(srclen);
    if (source == NULL)
        err_syserr("failed to allocate %zu bytes of memory: ", srclen);
    strcpy(source, err_getarg0());
    strcat(source, ".c");

    char *cmd1[] = { "ls", "-l", source, "|", "cat", NULL };
    char *cmd2[] = { "ps", "-ft", tty, NULL };
    char *cmd3[] = { "ls", "-l", source, "|", "cat", NULL };
    char *cmd4[] = { "ps", "-ft", tty, "&", NULL };
    char *cmd5[] = { "sleep", "5", NULL };
    char *cmd6[] = { "ls", "-l", source, "|", "cat", NULL };
    char *cmd7[] = { "ps", "-ft", tty, NULL };
    char **cmds[] = { cmd1, cmd2, cmd3, cmd4, cmd5, cmd6, cmd7, NULL };

    if (argc != 1)
        no_zombies = 1;
    fprintf(stderr, "%d: %s collecting zombies\n", getpid(), (no_zombies ? "is" : "is not"));

    for (int i = 0; cmds[i] != NULL; i++)
    {
        print_command(i, cmds[i]);
        process_arglist(cmds[i]);
        if (no_zombies)
            collect_zombies();
    }
    return 0;
}

I improved the amount of printed information — when debugging shells or multi-process processes, make copious use of printing and ensure you include the current PID in most lines of output. I split the uses of PRINT_ERROR, and I prefer (and now found it much more convenient use) function-like macros over object-like macros that actually do work.

The print_command function prints a sequence of arguments — to make sure I'm seeing the correct commands.

One good thing was that you made sure you closed enough file descriptors.

The details of the do_pipe() function is mostly unchanged, except for reporting which processes were created, and the status of the second process.

The functions dec_str() and hex_str() are signal-safe and are used from the signal handler (prevent_zombies()) to report on the process that died without breaking the rules POSIX specifies of which functions can be called inside signal handlers. The dec_str() function right justifies and blank pads a number; the hex_str() function right justifies but zero pads a number. The prevent_zombies() function reports on the signal caught and the child that died and its status. It also resets the signal handling for SIGCHLD back to the default. This works OK here. You should experiment without that; it changes the behaviour.

One key point in process_arglist() is that it resets the global variables wait_confirm and pipe_confirm to the default values. Those variables should really not be global (I made them static; there's only one source file here, so nothing except main() needs to be accessible outside it). The analyzing code is changed to a for loop to localize i.

The collect_zombies() function can be called to collect any zombie processes; it uses the WNOHANG option so that it doesn't wait for a process to die — it returns if there's no process already dead.

The main() function does some setup work. The err_setarg0() function records the process name — it would be used by err_syserr() called after detecting that malloc() has failed. The code assumes that the program you run is derived from the source file name. My versions were in sh47.c and sh97.c, so err_getarg0() reports sh47 or sh97; this is used to create sh47.c or sh97.c.

Because process_arglist() modifies the pointers in the argument list it is passed, the commands cannot be reused. If the arguments were allocated separately, process_arglist() would be leaking memory, too. Also, because there are lots of processes in lots of windows, I make sure the ps command only reports those in the current window by using ttyname() to get the terminal name. Similarly, I only list the source code for the program — not the 200+ other files in the directory.

The program then iterates over the list of commands, printing and executing each command. Depending on whether there were any command line arguments, it might harvest any zombies. It is very primitive argument handling; a 'real' shell would using getopt() or a similar function to control argument handling.

Example run — not collecting zombies

21013: is not collecting zombies
21013: command 1: ls -l sh97.c | cat
21013: writer 21014, reader 21015
-rw-r--r--  1 jonathanleffler  staff  5494 Apr 12 12:54 sh97.c
21013: reader 21015 exited with status 0x0000
21013: command 2: ps -ft /dev/ttys000
21013: child 21016 (ps) launched
  UID   PID  PPID   C STIME   TTY           TIME CMD
    0   820   765   0 24Mar20 ttys000    0:00.03 login -pf jonathanleffler
  501   822   820   0 24Mar20 ttys000    0:00.61 -bash
  501 21013   822   0  1:14PM ttys000    0:00.01 sh97
  501 21014 21013   0  1:14PM ttys000    0:00.00 (ls)
    0 21016 21013   0  1:14PM ttys000    0:00.00 ps -ft /dev/ttys000
21013: child 21016 (ps) exited with status 0x0000
21013: command 3: ls -l sh97.c | cat
21013: writer 21017, reader 21018
-rw-r--r--  1 jonathanleffler  staff  5494 Apr 12 12:54 sh97.c
21013: reader 21018 exited with status 0x0000
21013: command 4: ps -ft /dev/ttys000 &
21013: child 21019 running in background
21013: command 5: sleep 5
21013: child 21020 (sleep) launched
  UID   PID  PPID   C STIME   TTY           TIME CMD
    0   820   765   0 24Mar20 ttys000    0:00.03 login -pf jonathanleffler
  501   822   820   0 24Mar20 ttys000    0:00.61 -bash
  501 21013   822   0  1:14PM ttys000    0:00.01 sh97
  501 21014 21013   0  1:14PM ttys000    0:00.00 (ls)
  501 21017 21013   0  1:14PM ttys000    0:00.00 (ls)
    0 21019 21013   0  1:14PM ttys000    0:00.00 ps -ft /dev/ttys000
  501 21020 21013   0  1:14PM ttys000    0:00.00 sleep 5
21013: PID 21019 exited with status 0x0000 (signal 20)
21013: child 21020 (sleep) exited with status 0x0000
21013: command 6: ls -l sh97.c | cat
21013: writer 21021, reader 21022
-rw-r--r--  1 jonathanleffler  staff  5494 Apr 12 12:54 sh97.c
21013: reader 21022 exited with status 0x0000
21013: command 7: ps -ft /dev/ttys000
21013: child 21023 (ps) launched
  UID   PID  PPID   C STIME   TTY           TIME CMD
    0   820   765   0 24Mar20 ttys000    0:00.03 login -pf jonathanleffler
  501   822   820   0 24Mar20 ttys000    0:00.61 -bash
  501 21013   822   0  1:14PM ttys000    0:00.01 sh97
  501 21014 21013   0  1:14PM ttys000    0:00.00 (ls)
  501 21017 21013   0  1:14PM ttys000    0:00.00 (ls)
  501 21021 21013   0  1:14PM ttys000    0:00.00 (ls)
    0 21023 21013   0  1:14PM ttys000    0:00.00 ps -ft /dev/ttys000
21013: child 21023 (ps) exited with status 0x0000

Example run - collecting zombies

21033: is collecting zombies
21033: command 1: ls -l sh97.c | cat
21033: writer 21034, reader 21035
-rw-r--r--  1 jonathanleffler  staff  5494 Apr 12 12:54 sh97.c
21033: reader 21035 exited with status 0x0000
21033: zombie 21034 exited with status 0x0000
21033: command 2: ps -ft /dev/ttys000
21033: child 21036 (ps) launched
  UID   PID  PPID   C STIME   TTY           TIME CMD
    0   820   765   0 24Mar20 ttys000    0:00.03 login -pf jonathanleffler
  501   822   820   0 24Mar20 ttys000    0:00.61 -bash
  501 21033   822   0  1:17PM ttys000    0:00.01 sh97 1
    0 21036 21033   0  1:17PM ttys000    0:00.00 ps -ft /dev/ttys000
21033: child 21036 (ps) exited with status 0x0000
21033: command 3: ls -l sh97.c | cat
21033: writer 21037, reader 21038
-rw-r--r--  1 jonathanleffler  staff  5494 Apr 12 12:54 sh97.c
21033: reader 21038 exited with status 0x0000
21033: zombie 21037 exited with status 0x0000
21033: command 4: ps -ft /dev/ttys000 &
21033: child 21039 running in background
21033: command 5: sleep 5
21033: child 21040 (sleep) launched
  UID   PID  PPID   C STIME   TTY           TIME CMD
    0   820   765   0 24Mar20 ttys000    0:00.03 login -pf jonathanleffler
  501   822   820   0 24Mar20 ttys000    0:00.61 -bash
  501 21033   822   0  1:17PM ttys000    0:00.01 sh97 1
    0 21039 21033   0  1:17PM ttys000    0:00.00 ps -ft /dev/ttys000
  501 21040 21033   0  1:17PM ttys000    0:00.00 sleep 5
21033: PID 21039 exited with status 0x0000 (signal 20)
21033: child 21040 (sleep) exited with status 0x0000
21033: command 6: ls -l sh97.c | cat
21033: writer 21041, reader 21042
-rw-r--r--  1 jonathanleffler  staff  5494 Apr 12 12:54 sh97.c
21033: reader 21042 exited with status 0x0000
21033: zombie 21041 exited with status 0x0000
21033: command 7: ps -ft /dev/ttys000
21033: child 21043 (ps) launched
  UID   PID  PPID   C STIME   TTY           TIME CMD
    0   820   765   0 24Mar20 ttys000    0:00.03 login -pf jonathanleffler
  501   822   820   0 24Mar20 ttys000    0:00.61 -bash
  501 21033   822   0  1:17PM ttys000    0:00.01 sh97 1
    0 21043 21033   0  1:17PM ttys000    0:00.00 ps -ft /dev/ttys000
21033: child 21043 (ps) exited with status 0x0000

Comparison

In the first example, you can see the zombies (ls); in the second, there are no such processes. The sleep 5 command is there so that the output of the background ps does not get mixed with the subsequent ls and ps — try removing the sleep and see what happens.

As you can see, there's no problem here with running a sequence of commands, and no signs of lost output. I'm not sure what was going wrong in your version, but so much of your code was not provided that it's very possible that the trouble was not in what you showed. There are many possibilities. With that said, I think one factor in your problem was the unreset global variables. Another may be that you weren't resetting the processing after the background processes finished. Note that if your background processes don't finish tidily in the sequence you launch them (e.g. a series of sleep commands of different durations), then the background processing will not work as intended.

It really is worth monitoring your code as thoroughly as this, at least until you're reasonably confident it is working (and it is worth keeping the monitoring code available so that you can reactivate it if you run into problems after a change). You could look at #define macro for debug printing in C for ideas on how to make the debug code configurable at compile-time and run-time. If you do that, you might well need some proper argument processing to control the debug (think sh -x on steroids).

Jonathan Leffler
  • 730,956
  • 141
  • 904
  • 1,278
  • I will take my time to observe and understand what you did. Thank you very much it is extremely elaborated and informative! I sincerely appreciate your time and work. – Tamir Shalev Apr 13 '20 at 07:47