4

My code works well with malloc, but not with mmap. The code is below:

main.c

#include <stdio.h>
#include <stdlib.h>

int main(){
  int * p = (int*) malloc(sizeof(int));
  printf("in main(): value p = %d\n", *p);
  free(p);
}

preload.c

#define _GNU_SOURCE
#include <time.h>
#include <dlfcn.h>
#include <stdio.h>
#include <sys/types.h>

void *(*orig_malloc)(size_t size);
void *malloc(size_t size){
  printf("  Hooked(preload)! malloc:size:%lu\n", size);
  return orig_malloc(size);
}

void * (*orig_mmap)(void *start, size_t length, int prot, int flags, int fd, off_t offset);
void * mmap(void *start, size_t length, int prot, int flags, int fd, off_t offset){
  printf("  Hooked(preload)! mmap:start:%p, length:%lu, prot:%d, flags:%p, fd:%p, offset:%d\n", start, length, prot, flags, fd, offset);
  return orig_mmap(start, length, prot, flags, fd, offset);
}

void
_init(void)
{
  printf("Loading hack.\n");
  orig_malloc = (void* (*)(size_t)) dlsym(RTLD_NEXT, "malloc");
  orig_mmap = (void* (*)(void*, size_t, int, int, int, off_t)) dlsym(RTLD_NEXT, "mmap");
}

to compile it

gcc -Wall -fPIC -DPIC -c preload.c
ld -shared -o preload.so preload.o -ldl
gcc main.c

to run it with LD_PRELOAD

LD_PRELOAD=./preload.so ./a.out

to run it with strace

strace ./a.out 2>&1 | view -

the printout from LD_PRELOAD does not hook calls to mmap, but only calls to malloc. Meanwhile, when running with strace, the printout does show mmap is called multiple times.

This result baffles me; assuming mmap is indeed called by main.c (I guess through malloc), how come preload.c can not intercept mmap?

PS: My platform is Ubuntu 14.04 with Linux kernel 3.13

PS2: By syscall, I mean the syscall wrapper in libc (not sure if this makes a difference to the question though)..

Joshua
  • 40,822
  • 8
  • 72
  • 132
Richard
  • 14,642
  • 18
  • 56
  • 77

4 Answers4

7

The mmap calls printed by strace are glibc-internal. It's impossible to intercept the glibc-internal calls to mmap with LD_PRELOAD:

mmap is not in the .plt-section of /lib64/libc.so.6 but is called directly from glibc and therefore LD_PRELOAD can't intercept glibc's calls to mmap.

$ objdump -j .plt -d /lib64/libc.so.6 

/lib64/libc.so.6:     file format elf64-x86-64


Disassembly of section .plt:

000000000001f400 <*ABS*+0x8e3fb@plt-0x10>:
   1f400:   ff 35 02 ac 39 00       pushq  0x39ac02(%rip)        # 3ba008 <_GLOBAL_OFFSET_TABLE_+0x8>
   1f406:   ff 25 04 ac 39 00       jmpq   *0x39ac04(%rip)        # 3ba010 <_GLOBAL_OFFSET_TABLE_+0x10>
   1f40c:   0f 1f 40 00             nopl   0x0(%rax)

000000000001f410 <*ABS*+0x8e3fb@plt>:
   1f410:   ff 25 02 ac 39 00       jmpq   *0x39ac02(%rip)        # 3ba018 <_GLOBAL_OFFSET_TABLE_+0x18>
   1f416:   68 0b 00 00 00          pushq  $0xb
   1f41b:   e9 e0 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f420 <*ABS*+0xb8c10@plt>:
   1f420:   ff 25 fa ab 39 00       jmpq   *0x39abfa(%rip)        # 3ba020 <_GLOBAL_OFFSET_TABLE_+0x20>
   1f426:   68 0a 00 00 00          pushq  $0xa
   1f42b:   e9 d0 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f430 <realloc@plt>:
   1f430:   ff 25 f2 ab 39 00       jmpq   *0x39abf2(%rip)        # 3ba028 <_GLOBAL_OFFSET_TABLE_+0x28>
   1f436:   68 00 00 00 00          pushq  $0x0
   1f43b:   e9 c0 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f440 <malloc@plt>:
   1f440:   ff 25 ea ab 39 00       jmpq   *0x39abea(%rip)        # 3ba030 <_GLOBAL_OFFSET_TABLE_+0x30>
   1f446:   68 01 00 00 00          pushq  $0x1
   1f44b:   e9 b0 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f450 <__tls_get_addr@plt>:
   1f450:   ff 25 e2 ab 39 00       jmpq   *0x39abe2(%rip)        # 3ba038 <_GLOBAL_OFFSET_TABLE_+0x38>
   1f456:   68 02 00 00 00          pushq  $0x2
   1f45b:   e9 a0 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f460 <memalign@plt>:
   1f460:   ff 25 da ab 39 00       jmpq   *0x39abda(%rip)        # 3ba040 <_GLOBAL_OFFSET_TABLE_+0x40>
   1f466:   68 03 00 00 00          pushq  $0x3
   1f46b:   e9 90 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f470 <*ABS*+0x90f60@plt>:
   1f470:   ff 25 d2 ab 39 00       jmpq   *0x39abd2(%rip)        # 3ba048 <_GLOBAL_OFFSET_TABLE_+0x48>
   1f476:   68 09 00 00 00          pushq  $0x9
   1f47b:   e9 80 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f480 <_dl_find_dso_for_object@plt>:
   1f480:   ff 25 ca ab 39 00       jmpq   *0x39abca(%rip)        # 3ba050 <_GLOBAL_OFFSET_TABLE_+0x50>
   1f486:   68 04 00 00 00          pushq  $0x4
   1f48b:   e9 70 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f490 <calloc@plt>:
   1f490:   ff 25 c2 ab 39 00       jmpq   *0x39abc2(%rip)        # 3ba058 <_GLOBAL_OFFSET_TABLE_+0x58>
   1f496:   68 05 00 00 00          pushq  $0x5
   1f49b:   e9 60 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f4a0 <free@plt>:
   1f4a0:   ff 25 ba ab 39 00       jmpq   *0x39abba(%rip)        # 3ba060 <_GLOBAL_OFFSET_TABLE_+0x60>
   1f4a6:   68 06 00 00 00          pushq  $0x6
   1f4ab:   e9 50 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f4b0 <*ABS*+0xb8bc0@plt>:
   1f4b0:   ff 25 b2 ab 39 00       jmpq   *0x39abb2(%rip)        # 3ba068 <_GLOBAL_OFFSET_TABLE_+0x68>
   1f4b6:   68 08 00 00 00          pushq  $0x8
   1f4bb:   e9 40 ff ff ff          jmpq   1f400 <data.8467+0x1f390>

000000000001f4c0 <*ABS*+0x8ec70@plt>:
   1f4c0:   ff 25 aa ab 39 00       jmpq   *0x39abaa(%rip)        # 3ba070 <_GLOBAL_OFFSET_TABLE_+0x70>
   1f4c6:   68 07 00 00 00          pushq  $0x7
   1f4cb:   e9 30 ff ff ff          jmpq   1f400 <data.8467+0x1f390>
[m@localhost ~]$ 

Calls to mmap in glibc don't call it via a .plt entry but directly, it's impossible to intercept these calls:

$ objdump -d /lib64/libc.so.6 | grep mmap
[...]
   81628:   e8 83 ad 07 00          callq  fc3b0 <mmap>
   8177c:   e8 2f ac 07 00          callq  fc3b0 <mmap>
00000000000fc3b0 <mmap>:
   fc3c0:   73 01                   jae    fc3c3 <mmap+0x13>
  13a267:   e8 44 21 fc ff          callq  fc3b0 <mmap>
$ 

00000000000fc3b0 <mmap>:
   fc3b0:   49 89 ca                mov    %rcx,%r10
   fc3b3:   b8 09 00 00 00          mov    $0x9,%eax
   fc3b8:   0f 05                   syscall 
   fc3ba:   48 3d 01 f0 ff ff       cmp    $0xfffffffffffff001,%rax
   fc3c0:   73 01                   jae    fc3c3 <mmap+0x13>
   fc3c2:   c3                      retq   
   fc3c3:   48 8b 0d 96 da 2b 00    mov    0x2bda96(%rip),%rcx        # 3b9e60 <_DYNAMIC+0x2e0>
   fc3ca:   f7 d8                   neg    %eax
   fc3cc:   64 89 01                mov    %eax,%fs:(%rcx)
   fc3cf:   48 83 c8 ff             or     $0xffffffffffffffff,%rax
   fc3d3:   c3                      retq   
   fc3d4:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
   fc3db:   00 00 00 
   fc3de:   66 90                   xchg   %ax,%ax
4566976
  • 2,419
  • 1
  • 10
  • 14
  • Sorry, but sure the libc `mmap()` can be hooked that way! Just use the OP's code and a tiny test program calling `mmap()` ... result is here: "Hooked(preload)! mmap:start:(nil), length:20, prot:3, flags:0x22, fd:0xffffffff, offset:0" –  Jul 15 '15 at 19:34
  • Yes, but not glibc's calls to its `mmap` @FelixPalmen – 4566976 Jul 15 '15 at 19:37
  • so what lib defines it? It's definitely intercepted and the `LD_PRELOAD` "trick" can only intercept calls to libraries linked by `ld.so` [**edit** I think you refer to library-internal calls ... right?] –  Jul 15 '15 at 19:43
  • @FelixPalmen "I think you refer to library-internal calls ... right?" Yes. – 4566976 Jul 15 '15 at 19:48
  • capturing glibc-internal calls `mmap` is impossible with `LD_PRELOAD`, but they are captured by `strace`.. wondering how that is done? – Richard Jul 15 '15 at 21:37
  • @Richard [Here](http://stackoverflow.com/a/6672788) I found an answer how strace works. – 4566976 Jul 16 '15 at 15:57
  • thx.. though, strace is based on ptrace, which will switch into kernel space at runtime... which is something I want to avoid... – Richard Jul 17 '15 at 16:52
6

mmap is a syscall, malloc is not.

Since syscalls are essential for the functioning of a program, they must work before ld.so actually springs into action, they are reside in a section that gets loaded before everything else; it may be linked dynamically, but that mapping (of that particular "virtual" dynamic object) is done by the kernel itself. Looong before ld.so actually gets to work.

datenwolf
  • 159,371
  • 13
  • 185
  • 298
  • What I mean by syscall is really syscall wrapper in libc.. in this sense, can `LD_PRELOAD` capture syscall wrapper? (I suppose syscall wrapper is called after ld.so) – Richard Jul 15 '15 at 19:13
  • 2
    @Richard: No, the syscall wrapper in libc is bound to the PLT stub very early. So early that LD_PRELOAD is not able to take effect. – datenwolf Jul 15 '15 at 19:16
  • 1
    @Richard: See this article on how the whole PLT/GOT thing works out: https://www.technovelty.org/linux/plt-and-got-the-key-to-code-sharing-and-dynamic-libraries.html – datenwolf Jul 15 '15 at 19:18
  • What you are wrapping here is not the tiny syscall wrapper automatically linked (which isn't even present in an actual file), but the `mmap()` function in your libc. –  Jul 15 '15 at 19:23
  • "No, the syscall wrapper in libc is bound to the PLT stub very early" - this is wrong. Intra-libc mmap calls are not done through PLT and that's why they can't be intercepted. On the contrary, mmap calls in other libraries and in executable are done through PLT and can be interposed just as any other API. "dynamic linking is executed by the kernel itself" - wrong too, kernel does not do _any_ dynamic linking itself. – yugr Mar 03 '17 at 11:24
  • @yugr: I was not talking about dynamic linking of arbitrary libraries, but refering to the linkage of that special piece of code that implements syscall entry/exit: http://man7.org/linux/man-pages/man7/vdso.7.html notably this part: *The "vDSO" (virtual dynamic shared object) is a small shared library that __the kernel automatically maps__ into the address space of all user-space applications.* An in-depth explanation of what it does and how it works can be found here: http://www.trilithium.com/johan/2005/08/linux-gate/ – datenwolf Mar 03 '17 at 12:02
  • Well, kernel maps vDSO (it also maps executable and `ld.so`) but does not do any linkage for it. _All_ dynamic linking, even for vDSO, is done in `ld.so`, not in kernel. – yugr Mar 03 '17 at 12:14
  • @yugr: Eh, yes. I think I meant mapping when I wrote it (corrected it). And yes, it's `ld.so` (or whatever is specified as interpreter) that does the actual linking, which is why I mentioned that, too. Hmm, I think deleting that answer would be the best course of action, but SO won't allow it. Maybe suggest a few edits? – datenwolf Mar 03 '17 at 12:22
  • @traducerad: You'd have to work some [`ptrace(2)`](https://linux.die.net/man/2/ptrace) magic for that: Attach ptrace to process (set ptrace flags to execve the target process in a stopped state, then attach), with `PTRACE_SYSCALL` and everytime the process stops, manipulate the call stack to modify the syscall. Yes, I know, that's not pretty straightforward, but the intended way to do this kind of stuff. – datenwolf Oct 08 '19 at 14:48
3

The title to your question is actually the answer.

assuming mmap is indeed called by main.c (I guess through malloc)

So your main.c doesn't call the library function mmap()? Of course you can't intercept syscalls this way, how would you do it? Some architectures have a syscall CPU instruction, some use a special interrupt ... there are a lot of ways, but it's in any case completely different from C calling conventions. The kernel is not somehow linked to your binary but takes control (with some hardware assistence) when your userspace process does something ... "special".

If you want to know how to intercept syscalls, this is of course very platform specific, but I would advise you to just take a look in the source of the strace utility. You will never see an malloc() in strace, because this is not a syscall, malloc() uses the mmap syscall.

On the other hand, if you preload your lib to a binary that actually calls the libc mmap() function, it will work as expected.

In a nutshell: libc mmap() is a user-friendly wrapper around the mmap syscall and with the following main:

#include <sys/mman.h>

int main()
{
    void *test = mmap(0, 20, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
        -1, 0);
    return 0;
}

The result is:

Loading hack.
Hooked(preload)! mmap:start:(nil), length:20, prot:3, flags:0x22,
fd:0xffffffff, offset:0

  • I am targeting on `x86_64` only; seems the `syscall` instruction scheme is used. wondering if syscall interception can be done by simply hooking with all `syscall` instructions in a binary... – Richard Jul 15 '15 at 21:41
  • I don't think so ... `syscall` is on the one hand a CPU instruction (you can't hook those) and on the other hand a minimal wrapper in `glibc` allowing programs to call any syscall using C calling conventions. The latter can be hooked using LD_PRELOAD. –  Jul 15 '15 at 21:44
  • I believe it is doable by binary rewriting technique though – Richard Jul 16 '15 at 18:33
  • Well yes, ideally if it `syscall` instructions can be replaced with an instruction of the same length just calling some own subroutine .. but i guess it's a one-word instruction taking no data? Ah well, I never learned x86 instruction set, so I'm out ;) –  Jul 16 '15 at 20:42
  • I'm not sure how this answers the question. The author clearly specified that he's interested in syscall wrapper, not syscall itself. – yugr Mar 03 '17 at 11:27
  • @yugr the "PS" in the question was added after my answer, but it doesn't even matter; when libc calls its own functions, there's no dynamic symbol involved. –  Mar 08 '17 at 07:56
  • I think this answer is out of date. The title doesn't match what the answer claims. And the answer recommends to use strace, but in the question the user explained that he already did use strace. – Samuel Mar 03 '21 at 00:11
0

The syscall_intercept library does some binary rewriting tricks to intercept syscalls coming from libc. You can LD_PRELOAD your interception logic written using this library and it will intercept the syscalls; you can choose to handle them and/or pass them on to the kernel.

gohar94
  • 138
  • 1
  • 3
  • 11