The original argv
is normally handled as a single contiguous block of char *
values, followed immediately by another block of char *
values for the environment (the envp
in the int main(int argc, char **argv, char **envp)
variant of main()
, also pointed to by environ
). These are then followed by the argument strings and environment strings themselves.
The argument list and environment are probably not created by malloc()
per se — the arguments and environment are set up by the execve()
system call.
At one point three years ago, I was playing with 'find argv[0]
from a function other than main' and wrote the code shown below. It still works on Mac OS X Mavericks (10.9.4 — the original tested version was Snow Leopard 10.6) and Ubuntu 14.04. (There are better, but platform specific, ways to get argv[0]
from a function, but that's a separate SO question, so I would not use this technique, but it does work on some common platforms.)
#include "posixver.h"
#include <inttypes.h>
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h> /* putenv(), setenv() */
extern char **environ; /* Should be declared in <unistd.h> */
/*
** The object of the exercise is: given just environ (since that is all
** that is available to a library function) attempt to find argv[0] (and
** hence argc).
**
** On some platforms, the layout of memory is such that the number of
** arguments (argc) is available, followed by the argument vector,
** followed by the environment vector.
**
** argv environ
** | |
** v v
** | argc | argv0 | argv1 | ... | argvN | 0 | env0 | env1 | ... | envN | 0 |
**
** This applies to:
** -- Solaris 10 (32-bit, 64-bit SPARC)
** -- MacOS X 10.6 (Snow Leopard, 32-bit and 64-bit)
** -- Linux (RHEL 5 on x86/64, 32-bit and 64-bit)
**
** Sadly, this is not quite what happens on the other two Unix
** platforms. The value preceding argv0 seems to be a 0.
** -- AIX 6.1 (32-bit, 64-bit)
** -- HP-UX 11.23 IA64 (32-bit, 64-bit)
** Sub-standard POSIX support (no setenv()) and C99 support (no %zd).
**
** NB: If putenv() or setenv() is called to add an environment variable,
** then the base address of environ changes radically, moving off the
** stack onto heap, and all bets are off. Modifying an existing
** variable is not a problem.
**
** Spotting the change from stack to heap is done by observing whether
** the address pointed to by environ is more than 128 K times the size
** of a pointer from the address of a local variable.
**
** This code is nominally incredibly machine-specific - but actually
** works remarkably portably.
*/
typedef struct Arguments
{
char **argv;
size_t argc;
} Arguments;
static void print_cpp(const char *tag, int i, char **ptr)
{
uintptr_t p = (uintptr_t)ptr;
printf("%s[%d] = 0x%" PRIXPTR " (0x%" PRIXPTR ") (%s)\n",
tag, i, p, (uintptr_t)(*ptr), (*ptr == 0 ? "<null>" : *ptr));
}
enum { MAX_DELTA = sizeof(void *) * 128 * 1024 };
static Arguments find_argv0(void)
{
static char *dummy[] = { "<unknown>", 0 };
Arguments args;
uintptr_t i;
char **base = environ - 1;
uintptr_t delta = ((uintptr_t)&base > (uintptr_t)environ) ? (uintptr_t)&base - (uintptr_t)environ : (uintptr_t)environ - (uintptr_t)&base;
if (delta < MAX_DELTA)
{
for (i = 2; (uintptr_t)(*(environ - i) + 2) != i && (uintptr_t)(*(environ - i)) != 0; i++)
print_cpp("test", i, environ-i);
args.argc = i - 2;
args.argv = environ - i + 1;
}
else
{
args.argc = 1;
args.argv = dummy;
}
printf("argc = %zd\n", args.argc);
for (i = 0; i <= args.argc; i++)
print_cpp("argv", i, &args.argv[i]);
return args;
}
static void print_arguments(void)
{
Arguments args = find_argv0();
printf("Command name and arguments\n");
printf("argc = %zd\n", args.argc);
for (size_t i = 0; i <= args.argc; i++)
printf("argv[%zd] = %s\n", i, (args.argv[i] ? args.argv[i] : "<null>"));
}
static int check_environ(int argc, char **argv)
{
size_t n = argc;
size_t i;
unsigned long delta = (argv > environ) ? argv - environ : environ - argv;
printf("environ = 0x%lX; argv = 0x%lX (delta: 0x%lX)\n", (unsigned long)environ, (unsigned long)argv, delta);
for (i = 0; i <= n; i++)
print_cpp("chkv", i, &argv[i]);
if (delta > (unsigned long)argc + 1)
return 0;
for (i = 1; i < n + 2; i++)
{
printf("chkr[%zd] = 0x%lX (0x%lX) (%s)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)),
(*(environ-i) ? *(environ-i) : "<null>"));
fflush(0);
}
i = n + 2;
printf("chkF[%zd] = 0x%lX (0x%lX)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
i = n + 3;
printf("chkF[%zd] = 0x%lX (0x%lX)\n", i, (unsigned long)(environ - i), (unsigned long)(*(environ - i)));
return 1;
}
int main(int argc, char **argv)
{
printf("Before setting environment\n");
if (check_environ(argc, argv))
print_arguments();
//putenv("TZ=US/Pacific");
setenv("SHELL", "/bin/csh", 1);
printf("After modifying environment\n");
if (check_environ(argc, argv) == 0)
printf("Modifying environment messed everything up\n");
print_arguments();
putenv("CODSWALLOP=nonsense");
printf("After adding to environment\n");
if (check_environ(argc, argv) == 0)
printf("Adding environment messed everything up\n");
print_arguments();
return 0;
}
Example output from Mac OS X:
Before setting environment
environ = 0x7FFF584D04C8; argv = 0x7FFF584D0498 (delta: 0x6)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[1] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkr[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkr[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkr[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkr[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkF[7] = 0x7FFF584D0490 (0x5)
chkF[8] = 0x7FFF584D0488 (0x0)
test[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
test[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
test[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
test[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
test[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argc = 5
argv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
argv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
argv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
argv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
argv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Command name and arguments
argc = 5
argv[0] = ./find_argv0
argv[1] = macedonian
argv[2] = obelisk
argv[3] = mental breakdown
argv[4] = testing: 1, 2, 3
argv[5] = <null>
After modifying environment
environ = 0x7FFF584D04C8; argv = 0x7FFF584D0498 (delta: 0x6)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[1] = 0x7FFF584D04C0 (0x0) (<null>)
chkr[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkr[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkr[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkr[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkr[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkF[7] = 0x7FFF584D0490 (0x5)
chkF[8] = 0x7FFF584D0488 (0x0)
test[2] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
test[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
test[4] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
test[5] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
test[6] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argc = 5
argv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
argv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
argv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
argv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
argv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
argv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Command name and arguments
argc = 5
argv[0] = ./find_argv0
argv[1] = macedonian
argv[2] = obelisk
argv[3] = mental breakdown
argv[4] = testing: 1, 2, 3
argv[5] = <null>
After adding to environment
environ = 0x7FB1EA403B60; argv = 0x7FFF584D0498 (delta: 0x9ADC19927)
chkv[0] = 0x7FFF584D0498 (0x7FFF584D06B0) (./find_argv0)
chkv[1] = 0x7FFF584D04A0 (0x7FFF584D06BD) (macedonian)
chkv[2] = 0x7FFF584D04A8 (0x7FFF584D06C8) (obelisk)
chkv[3] = 0x7FFF584D04B0 (0x7FFF584D06D0) (mental breakdown)
chkv[4] = 0x7FFF584D04B8 (0x7FFF584D06E1) (testing: 1, 2, 3)
chkv[5] = 0x7FFF584D04C0 (0x0) (<null>)
Adding environment messed everything up
argc = 1
argv[0] = 0x107730040 (0x10772FEC0) (<unknown>)
argv[1] = 0x107730048 (0x0) (<null>)
Command name and arguments
argc = 1
argv[0] = <unknown>
argv[1] = <null>