0

I'm learning process in C so I've decided to make a simple shell, like /bin/sh takes a command or a pipe of commands then execute all of them.

I'm current testing a pipe of commands:

cat moby.txt | tr A-Z a-z | tr -C a-z \n | sed /^$/d | sort | uniq -c | sort -nr | sed 10q

My shell program executes all of them but it pauses at the sed 10q command. If I didn't execute the last command, the shell will return something like this:

myshell$ cat moby.txt | tr A-Z a-z | tr -C a-z \n | sed /^$/d | sort | uniq -c | sort -nr
Process 3213 exited with status 0
Process 3214 exited with status 0
Process 3215 exited with status 0
Process 3216 exited with status 0
Process 3217 exited with status 0
Process 3218 exited with status 0
...
1 abbreviate
1 abatement
1 abate
1 abasement
1 abandonedly
Process 3068 exited with status 0
myshell$

But when I execute the last command as well, the shell will not be returning anything and paused:

myshell$ cat moby.txt | tr A-Z a-z | tr -C a-z \n | sed /^$/d | sort | uniq -c | sort -nr | sed 10q
Process 3213 exited with status 0
Process 3214 exited with status 0
Process 3215 exited with status 0
Process 3216 exited with status 0
Process 3217 exited with status 0
Process 3218 exited with status 0
  14718 the
   6743 of
   6518 and
   4807 a
   4707 to
   4242 in
   3100 that
   2536 it
   2532 his
   2127 i (it should return the message if a process is successfully completed)

Here is the code:

// main
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>

#include "shell.h"

#define BUFFER_SIZE 1024

int main(void)
{
    char line_buffer[BUFFER_SIZE];
    char *argv[256];
    int n = 0;
    pid_t pids[30];

    while (1)
    {
        fprintf(stdout, "myshell$");
        fflush(NULL);

        if (!fgets(line_buffer, BUFFER_SIZE, stdin))
            break;

        /* File descriptors - 1 is existing, 0 is not existing */
        int prev = 0;  // previous command
        int first = 1; // first command in the pipe
        int last = 0;  // last command in the pipe

        // Separate commands with character '|'
        char *cmd = line_buffer;
        char *single_cmd = strchr(cmd, '|');

        while (single_cmd != NULL)
        {
            *single_cmd = '\0';
            parse(cmd, argv);

            if (strcmp(argv[0], "exit") == 0)
                exit(0);

            execute(&pids[n++], argv, &prev, &first, &last);
            cmd = single_cmd + 1;
            single_cmd = strchr(cmd, '|');
            first = 0;
        }

        parse(cmd, argv);

        if (argv[0] != NULL && (strcmp(argv[0], "exit") == 0))
            exit(0);

        last = 1;
        execute(&pids[n++], argv, &prev, &first, &last);

        int status;
        for (int i = 0; i < n; i++)
        {
            waitpid(pids[i], &status, 0);

            if (WIFEXITED(status))
            {
                int exit_status = WEXITSTATUS(status);
                fprintf(stdout, "Process %d exited with status %d\n",
                        pids[i], exit_status);
            }
        }

        n = 0;
    }

    return 0;
}
// shell.h
#ifndef _MY_SHELL_H
#define _MY_SHELL_H

#include <sys/types.h>

/**
 * @brief Parse function will parse a given line from user input into 
 * an array that represents the specified command and its arguments
 * 
 * @param line String buffer to parse
 * @param argv Array of tokens
 */
void parse(char *line, char **argv);

/**
 * @brief Execute function will take the array of tokens and execute them
 * using execvp.
 * 
 * @param argv Array of tokens
 */
void execute(pid_t *pid, char **argv, int *prev, int *first, int *last);

#endif // _MY_SHELL_H
// shell.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <sys/types.h>
#include <sys/wait.h>

#include "shell.h"

void parse(char *line, char **argv)
{

    size_t line_length = strlen(line);

    // fgets includes '\n' at the end of line
    if (line[line_length - 1] && line[line_length - 1] != ' ')
        line[line_length - 1] = ' ';

    // Parse the line into tokens
    char *token = strtok(line, " ");

    while (token != NULL)
    {
        // Store those tokens in the argument list
        *argv++ = token;
        // @see http://www.cplusplus.com/reference/cstring/strtok/
        token = strtok(NULL, " ");
    }

    // Bad address error - Indicate the end of the argument list
    *argv = (char *)NULL;
}

void execute(pid_t *pid, char **argv, int *prev, int *first, int *last)
{
    int fd[2];
    pipe(fd);

    *pid = fork();

    if (*pid < 0)
    {
        fprintf(stderr, "ERROR: fork failed. Program exited with 1\n");
        exit(1);
    }
    else if (*pid == 0)
    {
        // First command
        if (*first == 1 && *last == 0 && *prev == 0)
        {
            dup2(fd[1], STDOUT_FILENO);
        }
        // Middle commands
        else if (*first == 0 && *last == 0 && *prev != 0)
        {
            dup2(*prev, STDIN_FILENO);
            dup2(fd[1], STDOUT_FILENO);
        }
        else
        {
            dup2(*prev, STDIN_FILENO);
        }

        int status = execvp(*argv, argv);
        if (status < 0)
        {
            perror("ERROR: process cannot be executed. Program exited with 1");
            exit(1);
        }
    }

    if (*prev != 0)
        close(*prev);

    close(fd[1]);

    if (*last == 1)
        close(fd[0]);

    *prev = fd[0];
}

The reason why I'm confused is that in the pipe of commands, the previous sed /^$/d executes as expected, so I don't think that my shell program doesn't work with sed command. Every comment is appreciated. Thank you!

  • 2
    `execvp` only returns if it fails. If it successfully runs a program, then it never returns. This is by design. – William Pursell Jan 26 '22 at 00:06
  • What I meant by `return` is when a child process is completed with the associated command, the main shell will capture that child process's status and return the message as the examples I've given above – Richard H. Nguyen Jan 26 '22 at 00:10
  • What you're describing sounds like you've left some file descriptors open. – William Pursell Jan 26 '22 at 00:16
  • I thought it was the case at first, but if I replaced the `sed 10q` command with something like `tr a-z A-Z`, the main shell returned the corresponding message and continued; it didn't pause when I used the `sed 10q` command. – Richard H. Nguyen Jan 26 '22 at 00:23

2 Answers2

1

While you've closed the pipes your shell, you haven't closed the pipes in the forked process.

The last process actually does exit, it's the process before it which does not. The last process exits after reading the 10 lines it needs, leaving the process before it to trying to write more lines, but it never gets an error in doing so because it itself still has the read end of the pipe open. So it just hangs there waiting for something to read the data that it is writing.

The fix is to simply close(fd[0]) and close(fd[1]) in every forked process before calling execvp(), so that only the copies you make with dup() remain open.

Note that, while this does make your shell finish the command successfully, it still won't print the exit status of this process because WIFEXITED() evaluates to false. However, WIFSIGNALED() will evaluate to true, and then WTERMSIG() will tell you that it was killed with SIGPIPE.

Coleo
  • 66
  • 4
  • Thanks! Your solution works for me. I'm not sure the last part you mentioned because when I updated according to your answer, my program passed the test case I had mentioned with the correct status. – Richard H. Nguyen Jan 27 '22 at 04:58
  • @Yuuta If you count how many commands you're running, and count how many exit status messages are displayed, you'll see that there's one less. – Coleo Jan 27 '22 at 19:21
  • Can you elaborate on why the process you mentioned won't be evaluated to true instead? I indeed got the less messages and when I traversed back to that process, it returns status of 13. But I couldn't find any reference what that status is about. – Richard H. Nguyen Jan 30 '22 at 04:07
  • @Yuuta `signal.h` has named constants for the signals, so you can `if (signal == SIGPIPE)`, but as for simply converting the 13 to the signal name for display purposes, it doesn't seem like there are any good options. The question linked below goes over a lot of options but exactly what each does and if it's even available seems to vary at lot between platforms and in testing most of them don't work for me. So pilcrow's answer is the one I'd go with if I needed a signal number to name function. https://stackoverflow.com/questions/16509614/signal-number-to-name – Coleo Jan 30 '22 at 16:25
0

execvp is a system call that only returns if it cannot execute the command you ask it to. The reason is that execvp doesn't create a new process (that's the mission of fork()) but it installs the new executable substituting the actual program. The memory of the program running now is returned to the system, and new fresh memory is allocated for the new program. The new program is running under the same process that was running before the execvp system call, so, when you exit, you don't return to the old program, as it was lost.

You need to read a book about unix processes. An old book but that explains it very well, is "The UNIX programming environment" of Rob Pike and Brian Kernighan. Very good explanation from one of the inventors of unix (well, both where somehow involved) and still holds.

Of course, a good reference also is

$ man execvp
...
Luis Colorado
  • 10,974
  • 1
  • 16
  • 31
  • I realized the mistake when I said "`execvp` returns ..." but what I meant was the the status that the main process could capture its child processes after executing the `execvp` function. You can see my above examples. – Richard H. Nguyen Jan 27 '22 at 04:55
  • Ah, ok, see the other answer, from @Coleo, it addresses the point you are failing on. you have two descriptors in a pipe.... the child will receive also two, and has to close one end (the one the father left open has to be closed in the child and viceversa) – Luis Colorado Jan 27 '22 at 10:04