0

Consider the code snippet below.

The entry point of the program is main as defined in C-source code. Now, normally a function starts by decreasing %rsp to reserve space for local variables. But here, the GCC compiler reserves this space in some of the added (initial) functions.

My question is, where do I look for the number of bytes of reserved variables in these GCC-specific initialization functions? In this case, the number of reserved bytes is 0x08.

Also, in what order are these initial functions called?

00000000004003c0 <_start>:
  4003c0:   31 ed                   xor    ebp,ebp
  4003c2:   49 89 d1                mov    r9,rdx
  4003c5:   5e                      pop    rsi
  4003c6:   48 89 e2                mov    rdx,rsp
  4003c9:   48 83 e4 f0             and    rsp,0xfffffffffffffff0
  4003cd:   50                      push   rax
  4003ce:   54                      push   rsp
  4003cf:   49 c7 c0 a0 05 40 00    mov    r8,0x4005a0
  4003d6:   48 c7 c1 30 05 40 00    mov    rcx,0x400530
  4003dd:   48 c7 c7 c0 04 40 00    mov    rdi,0x4004c0
  4003e4:   e8 b7 ff ff ff          call   4003a0 <__libc_start_main@plt>
  4003e9:   f4                      hlt    
  4003ea:   66 0f 1f 44 00 00       nop    WORD PTR [rax+rax*1+0x0]

00000000004003f0 <deregister_tm_clones>:
  4003f0:   b8 37 10 60 00          mov    eax,0x601037
  4003f5:   55                      push   rbp
  4003f6:   48 2d 30 10 60 00       sub    rax,0x601030
  4003fc:   48 83 f8 0e             cmp    rax,0xe
  400400:   48 89 e5                mov    rbp,rsp
  400403:   76 1b                   jbe    400420 <deregister_tm_clones+0x30>
  400405:   b8 00 00 00 00          mov    eax,0x0
  40040a:   48 85 c0                test   rax,rax
  40040d:   74 11                   je     400420 <deregister_tm_clones+0x30>
  40040f:   5d                      pop    rbp
  400410:   bf 30 10 60 00          mov    edi,0x601030
  400415:   ff e0                   jmp    rax
  400417:   66 0f 1f 84 00 00 00    nop    WORD PTR [rax+rax*1+0x0]
  40041e:   00 00 
  400420:   5d                      pop    rbp
  400421:   c3                      ret    
  400422:   0f 1f 40 00             nop    DWORD PTR [rax+0x0]
  400426:   66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  40042d:   00 00 00 

0000000000400430 <register_tm_clones>:
  400430:   be 30 10 60 00          mov    esi,0x601030
  400435:   55                      push   rbp
  400436:   48 81 ee 30 10 60 00    sub    rsi,0x601030
  40043d:   48 c1 fe 03             sar    rsi,0x3
  400441:   48 89 e5                mov    rbp,rsp
  400444:   48 89 f0                mov    rax,rsi
  400447:   48 c1 e8 3f             shr    rax,0x3f
  40044b:   48 01 c6                add    rsi,rax
  40044e:   48 d1 fe                sar    rsi,1
  400451:   74 15                   je     400468 <register_tm_clones+0x38>
  400453:   b8 00 00 00 00          mov    eax,0x0
  400458:   48 85 c0                test   rax,rax
  40045b:   74 0b                   je     400468 <register_tm_clones+0x38>
  40045d:   5d                      pop    rbp
  40045e:   bf 30 10 60 00          mov    edi,0x601030
  400463:   ff e0                   jmp    rax
  400465:   0f 1f 00                nop    DWORD PTR [rax]
  400468:   5d                      pop    rbp
  400469:   c3                      ret    
  40046a:   66 0f 1f 44 00 00       nop    WORD PTR [rax+rax*1+0x0]

0000000000400470 <__do_global_dtors_aux>:
  400470:   80 3d b9 0b 20 00 00    cmp    BYTE PTR [rip+0x200bb9],0x0        # 601030 <__TMC_END__>
  400477:   75 11                   jne    40048a <__do_global_dtors_aux+0x1a>
  400479:   55                      push   rbp
  40047a:   48 89 e5                mov    rbp,rsp
  40047d:   e8 6e ff ff ff          call   4003f0 <deregister_tm_clones>
  400482:   5d                      pop    rbp
  400483:   c6 05 a6 0b 20 00 01    mov    BYTE PTR [rip+0x200ba6],0x1        # 601030 <__TMC_END__>
  40048a:   f3 c3                   repz ret 
  40048c:   0f 1f 40 00             nop    DWORD PTR [rax+0x0]

0000000000400490 <frame_dummy>:
  400490:   bf 20 0e 60 00          mov    edi,0x600e20
  400495:   48 83 3f 00             cmp    QWORD PTR [rdi],0x0
  400499:   75 05                   jne    4004a0 <frame_dummy+0x10>
  40049b:   eb 93                   jmp    400430 <register_tm_clones>
  40049d:   0f 1f 00                nop    DWORD PTR [rax]
  4004a0:   b8 00 00 00 00          mov    eax,0x0
  4004a5:   48 85 c0                test   rax,rax
  4004a8:   74 f1                   je     40049b <frame_dummy+0xb>
  4004aa:   55                      push   rbp
  4004ab:   48 89 e5                mov    rbp,rsp
  4004ae:   ff d0                   call   rax
  4004b0:   5d                      pop    rbp
  4004b1:   e9 7a ff ff ff          jmp    400430 <register_tm_clones>
  4004b6:   66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  4004bd:   00 00 00 

00000000004004c0 <main>:
  4004c0:   55                      push   rbp
  4004c1:   48 89 e5                mov    rbp,rsp
  4004c4:   c7 45 f8 00 00 00 00    mov    DWORD PTR [rbp-0x8],0x0
  4004cb:   c7 45 fc 01 00 00 00    mov    DWORD PTR [rbp-0x4],0x1
  4004d2:   eb 46                   jmp    40051a <.cend>
  4004d4:   66 66 66 2e 0f 1f 84    data16 data16 nop WORD PTR cs:[rax+rax*1+0x0]
  4004db:   00 00 00 00 00 
  4004e0:   ff 05 4e 0b 20 00       inc    DWORD PTR [rip+0x200b4e]        # 601034 <sum>
  4004e6:   50                      push   rax
  4004e7:   53                      push   rbx
  4004e8:   56                      push   rsi
  4004e9:   48 31 c0                xor    rax,rax
  4004ec:   48 c7 c6 14 05 40 00    mov    rsi,0x400514

00000000004004f3 <.cloop>:
  4004f3:   48 0f b6 1e             movzx  rbx,BYTE PTR [rsi]
  4004f7:   48 31 d8                xor    rax,rbx
  4004fa:   48 ff c6                inc    rsi
  4004fd:   48 81 fe 1a 05 40 00    cmp    rsi,0x40051a
  400504:   75 ed                   jne    4004f3 <.cloop>
  400506:   48 83 f8 00             cmp    rax,0x0
  40050a:   74 05                   je     400511 <.restore>
  40050c:   48 31 c0                xor    rax,rax
  40050f:   ff d0                   call   rax

0000000000400511 <.restore>:
  400511:   5e                      pop    rsi
  400512:   5b                      pop    rbx
  400513:   58                      pop    rax

0000000000400514 <.cstart>:
  400514:   eb 01                   jmp    400517 <.end>

0000000000400516 <.cslot>:
  400516:   ac                      lods   al,BYTE PTR ds:[rsi]

0000000000400517 <.end>:
  400517:   ff 45 fc                inc    DWORD PTR [rbp-0x4]

000000000040051a <.cend>:
  40051a:   83 7d fc 1e             cmp    DWORD PTR [rbp-0x4],0x1e
  40051e:   7e c0                   jle    4004e0 <main+0x20>
  400520:   8b 05 0e 0b 20 00       mov    eax,DWORD PTR [rip+0x200b0e]        # 601034 <sum>
  400526:   5d                      pop    rbp
  400527:   c3                      ret    
  400528:   0f 1f 84 00 00 00 00    nop    DWORD PTR [rax+rax*1+0x0]
  40052f:   00 

0000000000400530 <__libc_csu_init>:
  400530:   41 57                   push   r15
  400532:   41 56                   push   r14
  400534:   41 89 ff                mov    r15d,edi
  400537:   41 55                   push   r13
  400539:   41 54                   push   r12
  40053b:   4c 8d 25 ce 08 20 00    lea    r12,[rip+0x2008ce]        # 600e10 <__frame_dummy_init_array_entry>
  400542:   55                      push   rbp
  400543:   48 8d 2d ce 08 20 00    lea    rbp,[rip+0x2008ce]        # 600e18 <__init_array_end>
  40054a:   53                      push   rbx
  40054b:   49 89 f6                mov    r14,rsi
  40054e:   49 89 d5                mov    r13,rdx
  400551:   4c 29 e5                sub    rbp,r12
  400554:   48 83 ec 08             sub    rsp,0x8
  400558:   48 c1 fd 03             sar    rbp,0x3
  40055c:   e8 0f fe ff ff          call   400370 <_init>
  400561:   48 85 ed                test   rbp,rbp
  400564:   74 20                   je     400586 <__libc_csu_init+0x56>
  400566:   31 db                   xor    ebx,ebx
  400568:   0f 1f 84 00 00 00 00    nop    DWORD PTR [rax+rax*1+0x0]
  40056f:   00 
  400570:   4c 89 ea                mov    rdx,r13
  400573:   4c 89 f6                mov    rsi,r14
  400576:   44 89 ff                mov    edi,r15d
  400579:   41 ff 14 dc             call   QWORD PTR [r12+rbx*8]
  40057d:   48 83 c3 01             add    rbx,0x1
  400581:   48 39 eb                cmp    rbx,rbp
  400584:   75 ea                   jne    400570 <__libc_csu_init+0x40>
  400586:   48 83 c4 08             add    rsp,0x8
  40058a:   5b                      pop    rbx
  40058b:   5d                      pop    rbp
  40058c:   41 5c                   pop    r12
  40058e:   41 5d                   pop    r13
  400590:   41 5e                   pop    r14
  400592:   41 5f                   pop    r15
  400594:   c3                      ret    
  400595:   90                      nop
  400596:   66 2e 0f 1f 84 00 00    nop    WORD PTR cs:[rax+rax*1+0x0]
  40059d:   00 00 00 

00000000004005a0 <__libc_csu_fini>:
  4005a0:   f3 c3                   repz ret 

Disassembly of section .fini:

00000000004005a4 <_fini>:
  4005a4:   48 83 ec 08             sub    rsp,0x8
  4005a8:   48 83 c4 08             add    rsp,0x8
  4005ac:   c3
Shuzheng
  • 11,288
  • 20
  • 88
  • 186
  • 1
    What does the source code for `main` look like? It's hard to tell from just the disassembly. Also, there is no requirement for allocating space for local variables, they could all end up in registers in a small function - like `rax`, `rbx`, and `rsi`. – Bo Persson Apr 09 '17 at 11:52
  • Can you show us the the original source code that generates this? The code suggests the compiler (GCC) was taking advantage of the [Linux 64-bit Red Zone](https://en.wikipedia.org/wiki/Red_zone_(computing)) but the peculiarity is that the code is not a leaf function (it does a `call rax`). My best guess is you have written incorrect inline assembly that doesn't account for red zone and you end up clobbering it yourself. It is the only reason I can see for code generated like this. IMHO this generated code is buggy and I can only attribute it to bad inline assembly. – Michael Petch Apr 09 '17 at 14:50
  • If you are in fact using inline assembly to call a function, then I have another [answer on Stackoverflow](http://stackoverflow.com/a/37503773/3857942) that shows the complexities of using inline assembler in 64-bit Linux code to call a function and accounts for the red zone. It isn't pretty. – Michael Petch Apr 09 '17 at 14:54
  • I've inserted binary code into a single for loop program using LLVM. The inline assembler inserted computes a simple function, but it uses three registers to do it, and these I want to save on the stack. However saving them on the stack may overwrite the enclosing functions local variables. Please ask for more details if it may help. – Shuzheng Apr 09 '17 at 15:02
  • What I've attempted is to insert tamper proofing code into a module to check the integrity of a basic block at runtime. – Shuzheng Apr 09 '17 at 15:03
  • Okay, so I'm not far off the mark. There is code being inserted outside the normal mechanism of _C_ generated code. I can tell you that your code is buggy. You've actually generated code from a compiler that took advantage of the red zone (no need to adjust _RSP_ if that data < 128 bytes). Your code actually clobbers that area with pushes. If you are expecting the compiler to not take advantage of the red zone then you may have to compile with something like `-mno-red-zone` . The generated code will look more along the lines of what you are probably expecting. – Michael Petch Apr 09 '17 at 15:09
  • The alternative is that your inserted assembly code (via LLVM) has to potentially be red zone aware. You'd have to consider skipping over the first 128 bytes (subtract 128 from _RSP_) before doing the pushes (and add adjust it after the POPs). – Michael Petch Apr 09 '17 at 15:12
  • Thanks - that was actually the solution I made up myself, I.e subtracting 128. But I wondered whether that would always be on the safe side... what if the function needed more than 128 bytes of local variables... – Shuzheng Apr 09 '17 at 17:48
  • If red zone is enabled, does it apply globally to all functions? How do I see if it is enabled? I have other code also compiled using LLVM, but there I see functions subtracting from RSP, I think... – Shuzheng Apr 09 '17 at 18:00
  • By default in 64-bit Linux _GCC/CLANG_ will assume the red zone can be used when it is allowed. So you have to explicitly turn off red zone optimizations if you don't want them. The compiler can only avoid adjusting _RSP_ if the amount of local variable data <= 128 bytes AND if the function doesn't call other functions. Calling another function will clobber the stack data because the return address gets pushed. So any function where another function is called - you should find that _RSP_ explicitly adjusted. Same if you have > 128 bytes of local&temporary function data. – Michael Petch Apr 09 '17 at 23:48
  • The red zone also applies to Microsoft Visual C++ (64-bit) ? – Shuzheng Apr 10 '17 at 11:45
  • Actually a function calling another function may also use the red zone, if it doesn't care about its local data? – Shuzheng Apr 10 '17 at 11:48

0 Answers0