6

I am reading Smashing the Stack for Fun and Profit (in particular, this post refers to the "Buffer Overflows" section). The article is written for a 32-bit machine however I am working on a 64-bit for which I take account in my examples. One particular example is causing some issues that I cannot explain. example3.c has the functionality of overwriting the return address to skip an instruction in the main function. Here is my code:

#include <stdio.h>

void function(int a, int b, int c)
{
  char buf1[5];
  char buf2[10];
  int *retptr;

  retptr = (void*)(buf2 + 40);
  (*retptr) += 8;
}

int main(void)
{
  int x;

  x = 0;
  function(1,2,3);
  x = 1;
  printf("%d\n", x);
  return 0;
}

I compile this program with gcc v4.8.2 with the following command:

gcc example3.c -o example3

Note that by default the gcc compiler appears to implement some stack protection such as address space layout randomisation and stack canaries. I have taken into account these safety measures in my calculation of the ret pointer value. Here is the corresponding assembly produced by gcc example3.c -S -fverbose-asm -o stack-protection.s:

    .file   "example3.c"
# GNU C (Ubuntu 4.8.2-19ubuntu1) version 4.8.2 (x86_64-linux-gnu)
#   compiled by GNU C version 4.8.2, GMP version 5.1.3, MPFR version 3.1.2-p3, MPC version 1.0.1
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed:  -imultiarch x86_64-linux-gnu example3.c -mtune=generic
# -march=x86-64 -auxbase-strip verbose-stack-pro.s -fverbose-asm
# -fstack-protector -Wformat -Wformat-security
# options enabled:  -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg -fcommon
# -fdelete-null-pointer-checks -fdwarf2-cfi-asm -fearly-inlining
# -feliminate-unused-debug-types -ffunction-cse -fgcse-lm -fgnu-runtime
# -fident -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-debug-strings
# -fmove-loop-invariants -fpeephole -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column
# -fsigned-zeros -fsplit-ivs-in-unroller -fstack-protector
# -fstrict-volatile-bitfields -fsync-libcalls -ftrapping-math
# -ftree-coalesce-vars -ftree-cselim -ftree-forwprop -ftree-loop-if-convert
# -ftree-loop-im -ftree-loop-ivcanon -ftree-loop-optimize
# -ftree-parallelize-loops= -ftree-phiprop -ftree-pta -ftree-reassoc
# -ftree-scev-cprop -ftree-slp-vectorize -ftree-vect-loop-version
# -funit-at-a-time -funwind-tables -fverbose-asm -fzero-initialized-in-bss
# -m128bit-long-double -m64 -m80387 -maccumulate-outgoing-args
# -malign-stringops -mfancy-math-387 -mfp-ret-in-387 -mfxsr -mglibc
# -mieee-fp -mlong-double-80 -mmmx -mno-sse4 -mpush-args -mred-zone -msse
# -msse2 -mtls-direct-seg-refs

    .text
    .globl  function
    .type   function, @function
function:
.LFB0:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    subq    $64, %rsp   #,
    movl    %edi, -52(%rbp) # a, a
    movl    %esi, -56(%rbp) # b, b
    movl    %edx, -60(%rbp) # c, c
    movq    %fs:40, %rax    #, tmp65
    movq    %rax, -8(%rbp)  # tmp65, D.2197
    xorl    %eax, %eax  # tmp65
    leaq    -32(%rbp), %rax #, tmp61
    addq    $40, %rax   #, tmp62
    movq    %rax, -40(%rbp) # tmp62, ret
    movq    -40(%rbp), %rax # ret, tmp63
    movl    (%rax), %eax    # *ret_1, D.2195
    leal    8(%rax), %edx   #, D.2195
    movq    -40(%rbp), %rax # ret, tmp64
    movl    %edx, (%rax)    # D.2195, *ret_1
    movq    -8(%rbp), %rax  # D.2197, tmp66
    xorq    %fs:40, %rax    #, tmp66
    je  .L2 #,
    call    __stack_chk_fail    #
.L2:
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   function, .-function
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  main
    .type   main, @function
main:
.LFB1:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    subq    $16, %rsp   #,
    movl    $0, -4(%rbp)    #, x
    movl    $3, %edx    #,
    movl    $2, %esi    #,
    movl    $1, %edi    #,
    call    function    #
    movl    $1, -4(%rbp)    #, x
    movl    -4(%rbp), %eax  # x, tmp61
    movl    %eax, %esi  # tmp61,
    movl    $.LC0, %edi #,
    movl    $0, %eax    #,
    call    printf  #
    movl    $0, %eax    #, D.2200
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
    .section    .note.GNU-stack,"",@progbits

Executing example3 has the desired effect of skipping the second assignment to x and the program outputs 0.

However, if instead I compile using the -fno-stack-protector option:

gcc -fno-stack-protector example3.c -S -fverbose-asm -o no-stack-protection.s

I receive the following assembly file:

    .file   "example3.c"
# GNU C (Ubuntu 4.8.2-19ubuntu1) version 4.8.2 (x86_64-linux-gnu)
#   compiled by GNU C version 4.8.2, GMP version 5.1.3, MPFR version 3.1.2-p3, MPC version 1.0.1
# GGC heuristics: --param ggc-min-expand=100 --param ggc-min-heapsize=131072
# options passed:  -imultiarch x86_64-linux-gnu example3.c -mtune=generic
# -march=x86-64 -auxbase-strip verbose-no-stack-pro.s -fno-stack-protector
# -fverbose-asm -Wformat -Wformat-security
# options enabled:  -faggressive-loop-optimizations
# -fasynchronous-unwind-tables -fauto-inc-dec -fbranch-count-reg -fcommon
# -fdelete-null-pointer-checks -fdwarf2-cfi-asm -fearly-inlining
# -feliminate-unused-debug-types -ffunction-cse -fgcse-lm -fgnu-runtime
# -fident -finline-atomics -fira-hoist-pressure -fira-share-save-slots
# -fira-share-spill-slots -fivopts -fkeep-static-consts
# -fleading-underscore -fmath-errno -fmerge-debug-strings
# -fmove-loop-invariants -fpeephole -fprefetch-loop-arrays
# -freg-struct-return -fsched-critical-path-heuristic
# -fsched-dep-count-heuristic -fsched-group-heuristic -fsched-interblock
# -fsched-last-insn-heuristic -fsched-rank-heuristic -fsched-spec
# -fsched-spec-insn-heuristic -fsched-stalled-insns-dep -fshow-column
# -fsigned-zeros -fsplit-ivs-in-unroller -fstrict-volatile-bitfields
# -fsync-libcalls -ftrapping-math -ftree-coalesce-vars -ftree-cselim
# -ftree-forwprop -ftree-loop-if-convert -ftree-loop-im -ftree-loop-ivcanon
# -ftree-loop-optimize -ftree-parallelize-loops= -ftree-phiprop -ftree-pta
# -ftree-reassoc -ftree-scev-cprop -ftree-slp-vectorize
# -ftree-vect-loop-version -funit-at-a-time -funwind-tables -fverbose-asm
# -fzero-initialized-in-bss -m128bit-long-double -m64 -m80387
# -maccumulate-outgoing-args -malign-stringops -mfancy-math-387
# -mfp-ret-in-387 -mfxsr -mglibc -mieee-fp -mlong-double-80 -mmmx -mno-sse4
# -mpush-args -mred-zone -msse -msse2 -mtls-direct-seg-refs

    .text
    .globl  function
    .type   function, @function
function:
.LFB0:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    movl    %edi, -36(%rbp) # a, a
    movl    %esi, -40(%rbp) # b, b
    movl    %edx, -44(%rbp) # c, c
    leaq    -32(%rbp), %rax #, tmp61
    addq    $40, %rax   #, tmp62
    movq    %rax, -8(%rbp)  # tmp62, ret
    movq    -8(%rbp), %rax  # ret, tmp63
    movl    (%rax), %eax    # *ret_1, D.2195
    leal    8(%rax), %edx   #, D.2195
    movq    -8(%rbp), %rax  # ret, tmp64
    movl    %edx, (%rax)    # D.2195, *ret_1
    popq    %rbp    #
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE0:
    .size   function, .-function
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  main
    .type   main, @function
main:
.LFB1:
    .cfi_startproc
    pushq   %rbp    #
    .cfi_def_cfa_offset 16
    .cfi_offset 6, -16
    movq    %rsp, %rbp  #,
    .cfi_def_cfa_register 6
    subq    $16, %rsp   #,
    movl    $0, -4(%rbp)    #, x
    movl    $3, %edx    #,
    movl    $2, %esi    #,
    movl    $1, %edi    #,
    call    function    #
    movl    $1, -4(%rbp)    #, x
    movl    -4(%rbp), %eax  # x, tmp61
    movl    %eax, %esi  # tmp61,
    movl    $.LC0, %edi #,
    movl    $0, %eax    #,
    call    printf  #
    movl    $0, %eax    #, D.2196
    leave
    .cfi_def_cfa 7, 8
    ret
    .cfi_endproc
.LFE1:
    .size   main, .-main
    .ident  "GCC: (Ubuntu 4.8.2-19ubuntu1) 4.8.2"
    .section    .note.GNU-stack,"",@progbits

and the corresponding executable does not produce the desired value of 0 but a random value which I cannot reconcile with the assembly file.

My mental picture of the stack frame in the -fno-stack-protector case is (sfp = saved frame pointer, ret = return address):

low memory address     buf2 (16 bytes)  buf1 (8 bytes)  retptr (8 bytes)  sfp (8 bytes) ret       high memory address
<---                  [              ][              ][                ][             ][    ] ...
top of stack                                                                                      bottom of stack

My Question:

Am I miscalculating the position of the return address in the unprotected case?

incomplete
  • 63
  • 5
  • Compile with `-S -fverbose-asm` and possibly also with `-O` – Basile Starynkevitch May 21 '15 at 08:40
  • I have updated my question to include the output from `-fverbose-asm`. `-O` seems to obliterate the output such that the no stack protection version has no `function`. I cannot see from the additional comments the option places in the assembly file what is going wrong. The two versions both seem to be performing the same operations on the `retptr` variable. – incomplete May 21 '15 at 09:12
  • The best bet to figure something like this out is to single step through the code so you can watch register/memory values. It's been awhile since I did asm, but there's a sub qword instruction on the rsp register that doesn't exist in the non-protected version. – Daniel Rudy May 21 '15 at 09:24
  • 1
    My guess would be that you are ignoring the variables that GCC adds to implement stack protection. One implementation shown here[1] uses extra local variables in a function, which disturbs the view of stack you have. [1][http://wiki.osdev.org/Stack_Smashing_Protector] Also, then you might say that you should see smashing in the `-fno-stack-protector` case not in the former one. Well, I think you have done some mistake in visualizing the stack in the former case, because I do not get the output i.e. `0` in the former case on ideone (http://ideone.com/dRVgZ2) – Nishant May 21 '15 at 10:05
  • 1
    @Nishant Regarding stack protection, I have explicitly calculated for extra variables being added as can be seen in the stack-protection.s assembly file; the canaries are added before the buffers but it makes no difference to my calculation of the return address since the `retptr` in the protected case replaces the canary. I think ideone.com is inadmissible here; no matter what value you add to the buffer (to compute `retptr`) it compiles successfully and outputs `1`, when it should instead result in a memory violation, suggesting aggressive optimisation. – incomplete May 21 '15 at 22:02
  • @incomplete Nice observation about ideone. Though if `retptr` replaces the canary in the protected case, then the program should exit after `function` returns instead of printing any output. Otherwise, you have to be sure that the canary value is the address of the instruction `x = 1` which seems unlikely. – Nishant May 22 '15 at 06:13

1 Answers1

2

Am I miscalculating the position of the return address in the unprotected case?

That part is correct, at least as long as the address fits in an int. The correct type for retptr would be long with x86-64 asm, so that the pointer can hold a 64 bit address.

You could double check that by running the following program:

#include <stdio.h>

void function(int a, int b, int c)
{
  char buf1[5];
  char buf2[10];
  int *retptr;

  retptr = (void*)(buf2 + 40);
  printf("retptr points to: %p\n", (long*)(long)*retptr);
  (*retptr) += 8;
}

int main(void)
{
  int x;


  printf("ret address is %p\n", &&label);
  x = 0;
  function(1,2,3);
label:
  x = 1;
  printf("%d\n", x);

  return 0;
}

By running this, you should be able to confirm that the address right after the function in is the one also held by retptr.

I believe that the reason you're not getting the expected 0, lies in this line:

(*retptr) += 8;

On my 64 bit system, x = 1 is compiled as:

  40058a:   c7 45 fc 01 00 00 00    movl   $0x1,-0x4(%rbp)
  400591:   8b 45 fc                mov    -0x4(%rbp),%eax
  400594:   89 c6                   mov    %eax,%esi

The first line loads 1 in x and the two other lines pass the value of x as an argument to printf(). Notice how that's 7 bytes, and not 8. If you change the increment to 7, you should see 0, as you expected.

Effectively, by adding 8, the ret instruction has setup the instruction pointer to point to 45, rather than 8b. That code then becomes:

  45 fc                 rex.RB cld 
  89 c6                 mov    %eax,%esi

I'm not entirely sure what happens at that point, and I suspect that depends on the CPU model. Mine appears to skip the instructions until mov %eax,%esi, and so printf displays the value of whatever %eax was. If you look at the disassembly of function(), it turns out that %rax is used to store the value of retptr, and that's the seemingly random value that gets printed.

Frederik Deweerdt
  • 4,943
  • 2
  • 29
  • 31
  • Ah, my bad for not using `long`. It appears that GCC allocates 8 bytes for the pointer anyway. I'm slightly confused about the instruction length being 7 bytes and why the 8-byte increment worked in the protected case. How do you obtain the layout of the instructions that implement `x = 1`? I used GDB's disassemble command but I don't get the same level detail as you show here. One point: "...the `ret` instruction has setup the instruction pointer to point to `45`, rather than `c7`", should that read `8b` instead of `c7`? Final question: How do you obtain the final assembly code snippet? – incomplete May 23 '15 at 15:03
  • 1
    I've used `objdump -D` on the executable in order to get the disassembly. Regarding `c7` rather than `8b`, I did mean `c7`: when jumping to `c7 45 fc 01 00 00 00`, because of the extra byte added to the return address, we end up jumping to `45 fc 01 00 00...`. Regarding the final code snippet, I wrote a small program with: `char asm_snippet[]={0x45, 0xfc, 0x01, 0x00, 0x00 ...`. and then used `objdump -D`. You could also use `x /32i asm_snippet` in gdb. I learned this from here: https://lkml.org/lkml/2008/1/7/406 – Frederik Deweerdt May 23 '15 at 15:53
  • Isn't the idea that we want to jump passed the instruction `c7 45 fc 01 00 00 00`? So, adding 7 bytes would take us to `8b 45 fc` and adding an extra byte would cause us to jump passed `8b` to `45 fc`? – incomplete May 24 '15 at 12:57