22

I am trying to load an ELF file compiled with "gcc -m32 test.c -o test.exe" on Linux in a 64 bit x86 environment. I am trying to load that 32bit file (test.exe) inside a user space ELF loader which has the following core logic (32bit ELF).

The problem is that calling into the returned start address results in a segmentation fault core dump. Here is the code:

void *image_load (char *elf_start, unsigned int size)
{
    Elf32_Ehdr      *hdr    = NULL;
    Elf32_Phdr      *phdr   = NULL;
    unsigned char   *start  = NULL;
    Elf32_Addr      taddr   = 0;
    Elf32_Addr      offset  = 0;
    int i = 0;
    unsigned char *exec = NULL;
    Elf32_Addr      estart = 0;

    hdr = (Elf32_Ehdr *) elf_start;

    if(!is_image_valid(hdr)) {
        printk("image_load:: invalid ELF image\n");
        return 0;
    }

    exec = (unsigned char *)mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
                      MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);

    if(!exec) {
        printk("image_load:: error allocating memory\n");
        return 0;
    }

    // Start with clean memory.
    memset(exec,0x0,size);

    phdr = (Elf32_Phdr *)(elf_start + hdr->e_phoff);

    for(i=0; i < hdr->e_phnum; ++i) {
            if(phdr[i].p_type != PT_LOAD) {
                    continue;
            }
            if(phdr[i].p_filesz > phdr[i].p_memsz) {
                    printk("image_load:: p_filesz > p_memsz\n");
                    munmap(exec, size);
                    return 0;
            }
            if(!phdr[i].p_filesz) {
                    continue;
            }

            // p_filesz can be smaller than p_memsz,
            // the difference is zeroe'd out.
            start = (unsigned char *) (elf_start + phdr[i].p_offset);
            // taddr = phdr[i].p_vaddr + (Elf32_Addr)exec;
            if(!estart) {
                estart = phdr[i].p_paddr;
            }
            taddr = (Elf32_Addr)exec + offset + (phdr[i].p_paddr - estart);
            memmove((unsigned char *)taddr,
                    (unsigned char *)start,phdr[i].p_filesz);
            offset += (phdr[i].p_memsz + (phdr[i].p_paddr - estart));

            if(!(phdr[i].p_flags & PF_W)) {
                    // Read-only.
                    mprotect((unsigned char *) taddr, 
                              phdr[i].p_memsz,
                              PROT_READ);
            }

            if(phdr[i].p_flags & PF_X) {
                    // Executable.
                    mprotect((unsigned char *) taddr, 
                            phdr[i].p_memsz,
                            PROT_EXEC);
            }
    }

    return (void *)((hdr->e_entry - estart) + (Elf32_Addr)exec);

}/* image_load */

...
    int (*main)(int, char **)=image_load(...);
    main(argc,argv); // Crash...
...
jopasserat
  • 5,721
  • 4
  • 31
  • 50
Smokey
  • 281
  • 1
  • 2
  • 6
  • 1
    `printk` suggests a kernel module, while `mmap` & `mprotect` are user-space. If you are coding a user-space application, did you consider compiling it with `gcc -Wall -g` and debugging it with `gdb` ? And the start address of an ELF image is *not* the `main` routine (but some `_start` in some `crt0.o`) – Basile Starynkevitch Dec 17 '12 at 07:47

3 Answers3

31

Please provide full runnable code, including the ELF that you are trying to load. I have taken the time to amend your code as best I could, and it seems to work, at least for this simple code.

Note that the loader must also be compiled as 32 bit code, you can not load a 32 bit file into a 64 bit process. Furthermore since you are not loading the code in the original place, it must be relocatable. Finally, it must be a static binary because you are not loading any libraries.

Update: Your code expects the entry point of the loaded code to conform to the int (*main)(int, char **) prototype which is not the case in general (side note: main actually gets a third argument, the environment, too). Read about the startup state of ELF. If you manually create the stack layout described there, you must jump to the entry point, and that will never return. In case of a C program, you could dig out the address of main and that would match the prototype. However you are then skipping the initalization of the C library (remember, your code doesn't do library loading, so the loaded program must be statically linked) and that could be a problem.

I have extended the code with the needed bits to handle a simple C program by resolving libc references and invoking main.

loader.c:

#include <stdio.h>
#include <string.h>
#include <libelf.h>
#include <sys/mman.h>
#include <dlfcn.h>

void printk(const char* msg)
{
    fputs(msg, stderr);
}

int is_image_valid(Elf32_Ehdr *hdr)
{
    return 1;
}

void *resolve(const char* sym)
{
    static void *handle = NULL;
    if (handle == NULL) {
        handle = dlopen("libc.so", RTLD_NOW);
    }
    return dlsym(handle, sym);
}

void relocate(Elf32_Shdr* shdr, const Elf32_Sym* syms, const char* strings, const char* src, char* dst)
{
    Elf32_Rel* rel = (Elf32_Rel*)(src + shdr->sh_offset);
    int j;
    for(j = 0; j < shdr->sh_size / sizeof(Elf32_Rel); j += 1) {
        const char* sym = strings + syms[ELF32_R_SYM(rel[j].r_info)].st_name;
        switch(ELF32_R_TYPE(rel[j].r_info)) {
            case R_386_JMP_SLOT:
            case R_386_GLOB_DAT:
                *(Elf32_Word*)(dst + rel[j].r_offset) = (Elf32_Word)resolve(sym);
                break;
        }
    }
}

void* find_sym(const char* name, Elf32_Shdr* shdr, const char* strings, const char* src, char* dst)
{
    Elf32_Sym* syms = (Elf32_Sym*)(src + shdr->sh_offset);
    int i;
    for(i = 0; i < shdr->sh_size / sizeof(Elf32_Sym); i += 1) {
        if (strcmp(name, strings + syms[i].st_name) == 0) {
            return dst + syms[i].st_value;
        }
    }
    return NULL;
}

void *image_load (char *elf_start, unsigned int size)
{
    Elf32_Ehdr      *hdr     = NULL;
    Elf32_Phdr      *phdr    = NULL;
    Elf32_Shdr      *shdr    = NULL;
    Elf32_Sym       *syms    = NULL;
    char            *strings = NULL;
    char            *start   = NULL;
    char            *taddr   = NULL;
    void            *entry   = NULL;
    int i = 0;
    char *exec = NULL;

    hdr = (Elf32_Ehdr *) elf_start;

    if(!is_image_valid(hdr)) {
        printk("image_load:: invalid ELF image\n");
        return 0;
    }

    exec = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
                      MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);

    if(!exec) {
        printk("image_load:: error allocating memory\n");
        return 0;
    }

    // Start with clean memory.
    memset(exec,0x0,size);

    phdr = (Elf32_Phdr *)(elf_start + hdr->e_phoff);

    for(i=0; i < hdr->e_phnum; ++i) {
            if(phdr[i].p_type != PT_LOAD) {
                    continue;
            }
            if(phdr[i].p_filesz > phdr[i].p_memsz) {
                    printk("image_load:: p_filesz > p_memsz\n");
                    munmap(exec, size);
                    return 0;
            }
            if(!phdr[i].p_filesz) {
                    continue;
            }

            // p_filesz can be smaller than p_memsz,
            // the difference is zeroe'd out.
            start = elf_start + phdr[i].p_offset;
            taddr = phdr[i].p_vaddr + exec;
            memmove(taddr,start,phdr[i].p_filesz);

            if(!(phdr[i].p_flags & PF_W)) {
                    // Read-only.
                    mprotect((unsigned char *) taddr,
                              phdr[i].p_memsz,
                              PROT_READ);
            }

            if(phdr[i].p_flags & PF_X) {
                    // Executable.
                    mprotect((unsigned char *) taddr,
                            phdr[i].p_memsz,
                            PROT_EXEC);
            }
    }

    shdr = (Elf32_Shdr *)(elf_start + hdr->e_shoff);

    for(i=0; i < hdr->e_shnum; ++i) {
        if (shdr[i].sh_type == SHT_DYNSYM) {
            syms = (Elf32_Sym*)(elf_start + shdr[i].sh_offset);
            strings = elf_start + shdr[shdr[i].sh_link].sh_offset;
            entry = find_sym("main", shdr + i, strings, elf_start, exec);
            break;
        }
    }

    for(i=0; i < hdr->e_shnum; ++i) {
        if (shdr[i].sh_type == SHT_REL) {
            relocate(shdr + i, syms, strings, elf_start, exec);
        }
    }

   return entry;

}/* image_load */

int main(int argc, char** argv, char** envp)
{
    int (*ptr)(int, char **, char**);
    static char buf[1048576];
    FILE* elf = fopen(argv[1], "rb");
    fread(buf, sizeof buf, 1, elf);
    ptr=image_load(buf, sizeof buf);
    return ptr(argc,argv,envp);
}

elf.c:

#include <stdio.h>

int main()
{
    fprintf(stdout, "Hello world! fprintf=%p, stdout=%p\n", fprintf, stdout);
    return 0;
}

test run:

$ gcc -m32 -g -Wall -ldl -o loader loader.c
$ gcc -m32 -pie -fPIE -o elf elf.c
$ ./loader elf
Hello world! fprintf=0xf7612420, stdout=0xf770e4c0
Jester
  • 56,577
  • 4
  • 81
  • 125
  • 1
    Thanks for your help. The problem appears to be when you try to load a C program (even a simple one that prints hello world). My guess is that either there is a problem calling the C _start routine directly, or that the loader code has a bug relocating more than one section (around where the call to memmove is made). – Smokey Dec 18 '12 at 02:24
  • sorry for this, but I just wanted to know where you guys get the libelf libraries. Thank you – Kimutai Jul 15 '14 at 07:54
  • 1
    this is a very old thread, but, do you not need to worry about things being page aligned in your code? – user1018513 Oct 21 '14 at 17:04
  • @user1018513 here we are loading a proper ELF binary created by the normal toolchain, so all the alignment is already taken care of. – Jester Oct 21 '14 at 17:36
  • This isn't a very efficient way of doing things. That buffer is just for reading in the whole file. You can simply [stat](https://man7.org/linux/man-pages/man2/stat.2.html) it to get the size. – Jester May 04 '21 at 12:00
  • Sorry, I meant the size of the `mmap`. Would it be the value of adding all `p_memsz` together? Also, if I'm just trying to load in my own shared library that does not link to anything else and no `libc` do I need to do relocation? – Edward Chamberlain May 04 '21 at 22:46
6

exec = (unsigned char *)mmap(NULL, size, ...

This attempts to load the executable at arbitrary address. A non-PIE executable can only be loaded at the address it was linked at (usually 0x08048000 on Linux/ix86).

The problem appears to be when you try to load a C program (even a simple one that prints hello world).

If that program was dynamically linked, it is anything but simple, and your loader has a heck of a lot more to do: loading and relocating dependent shared libraries, fixing up GOT and TLS, etc. etc.

Employed Russian
  • 199,314
  • 34
  • 295
  • 362
2

use

exec = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
                  MAP_PRIVATE | MAP_ANONYMOUS, hdr, 0);

instead of

exec = mmap(NULL, size, PROT_READ | PROT_WRITE | PROT_EXEC,
                  MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
j0k
  • 22,600
  • 28
  • 79
  • 90
Asad
  • 21
  • 1