popen()
could be used
The main question says it wants to achieve the equivalent of the shell command:
output=$(echo -n 1234 | md5sum)
and that can be written in C with popen()
:
#include <stdio.h>
int main(void)
{
char output[64];
FILE *fp = popen("echo -n 1234 | md5sum", "r");
if (fp == 0)
{
fprintf(stderr, "failed to create pipeline\n");
return 1;
}
size_t nbytes = fread(output, sizeof(char), sizeof(output), fp);
pclose(fp);
if (nbytes > 0)
{
printf("output=[%.*s]\n", (int)nbytes, output);
return 0;
}
else
{
printf("no output available!\n");
return 1;
}
}
The output from this is:
output=[4c35fa2227e11ba8c892cbbb5d46417c -
]
Note that this equivalent to the output from the first of the following commands:
$ printf "%s %s\n" -n 1234 | md5sum
4c35fa2227e11ba8c892cbbb5d46417c -
$ printf "%s" 1234 | md5sum
81dc9bdb52d04dc20036dbd8313ed055 -
$
(Using echo -n
isn't entirely portable. There's room to think you were expecting the output from the second command.)
If you want two-way communication
However, the headline question in the title suggests you want your program to write data to the md5sum
command and read the response hash back again. That is certainly doable, but you have to work harder, and popen()
is no longer appropriate. I'm too lazy not to use my preferred error reporting functions, which are available on GitHub in my SOQ — Stack Overflow Questions repository as files stderr.c
and stderr.h
in the LibSOQ directory. It makes error checking so much simpler when you simply write a test and make a function call if the test indicates failure.
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include "stderr.h"
static void fd_copy(int ifd, int ofd)
{
char buffer[1024];
ssize_t rbytes;
while ((rbytes = read(ifd, buffer, sizeof(buffer))) > 0)
{
ssize_t wbytes;
if ((wbytes = write(ofd,buffer, (size_t)rbytes)) != rbytes)
err_syserr("short write %zd bytes instead of %zd expected: ", wbytes, rbytes);
}
}
static void be_childish(int *to_child, int *to_parent)
{
if (dup2(to_child[0], STDIN_FILENO) != STDIN_FILENO)
err_syserr("failed to duplicate pipe to standard input: ");
if (dup2(to_parent[1], STDOUT_FILENO) != STDOUT_FILENO)
err_syserr("failed to duplicate pipe to standard output: ");
close(to_child[1]);
close(to_child[1]);
close(to_parent[0]);
close(to_parent[0]);
char *cmd[] = { "md5sum", 0 };
execvp(cmd[0], cmd);
err_syserr("failed to execute command %s: ", cmd[0]);
/*NOTREACHED*/
}
static void be_parental(const char *fname, int *to_child, int *to_parent)
{
close(to_child[0]);
close(to_parent[1]);
int fd = open(fname, O_RDONLY);
if (fd < 0)
err_syserr("failed to open file '%s' for reading: ", fname);
fd_copy(fd, to_child[1]);
close(fd);
close(to_child[1]);
char buffer[128];
ssize_t rbytes = read(to_parent[0], buffer, sizeof(buffer));
close(to_parent[0]);
if (rbytes <= 0)
err_syserr("read error (or unexpected EOF) from hash process: ");
buffer[strspn(buffer, "0123456789abcdefABCDEF")] = '\0';
printf("%s: MD5 %s\n", fname, buffer);
}
static void md5hash(const char *fname)
{
int to_child[2];
int to_parent[2];
if (pipe(to_child) != 0 || pipe(to_parent) != 0)
err_syserr("failed to create 2 pipes: ");
int pid = fork();
if (pid < 0)
err_syserr("failed to fork: ");
if (pid == 0)
be_childish(to_child, to_parent);
else
be_parental(fname, to_child, to_parent);
}
int main(int argc, char **argv)
{
err_setarg0(argv[0]);
if (argc <= 1)
err_usage("file [...]");
for (int i = 1; i < argc; i++)
md5hash(argv[i]);
return 0;
}
Example output (program was pop73
):
$ pop73 mm*
mm19.c: MD5 1d2207adec878f8f10d12e1ffb8bcc4b
mm23.c: MD5 c3948c5a80107fdbfac9ad755e7e6470
mm53.c: MD5 a0a24610400d900eb18408519678481e
mm59.c: MD5 1a5b1807c331dd1b5b6ce5f6ffed7c59
$ md5sum mm*
1d2207adec878f8f10d12e1ffb8bcc4b mm19.c
c3948c5a80107fdbfac9ad755e7e6470 mm23.c
a0a24610400d900eb18408519678481e mm53.c
1a5b1807c331dd1b5b6ce5f6ffed7c59 mm59.c
$
This confirms that the files were correctly copied to the child, and the output MD5 hashes match.
I like the be_childish()
and be_parental()
function names for programs involving child and parent processes. Using functions for discrete jobs makes the code simpler to understand and maintain in the long run. Note, too, how careful the code is to close file descriptors.
Rule of thumb: If you duplicate one end of a pipe to standard input or standard output (or standard error), you should close both ends of the pipe before continuing.
The be_childish()
function exemplifies that rule of thumb. Before continuing to execute the command, it closes all 4 descriptors of the 2 pipes it is passed because it has copied one descriptor from each pipe to one of the standard streams.
Note the corollary: if you don't duplicate a file descriptor to one of the standard channels, you don't usually close both file descriptors of the pipe immediately. You do normally close one end of the pipe — the end that the current process won't use. But the other is left open until you've finished processing data, whatever that means. The be_parental()
function exemplifies the corollary.