5

I was just playing with the call stack, trying to change the return address of a function etc, and wound up writing this program in C:

#include<stdio.h>

void trace(int);
void func3(int);
void func2(int);
void func1(int);

int main(){

    int a = 0xAAAA1111;

    func1(0xFCFCFC01);

    return 0;

}

void func1(int a){

    int loc = 0xBBBB1111;

    func2(0xFCFCFC02);

}

void func2(int a){

    int loc1 = 0xCCCC1111;
    int loc2 = 0xCCCC2222;

    func3(0xFCFCFC03);

}

void func3(int a){

    int loc1 = 0xDDDD1111;
    int loc2 = 0xDDDD2222;
    int loc3 = 0xDDDD3333;

    trace(0xFCFCFC04);

}

void trace(int a){

    int loc = 0xEEEE1111;

    int *ptr = &loc;

    do {
    printf("0x%08X : %08X\n", ptr, *ptr, *ptr);
    } while(*(ptr++) != 0xAAAA1111);

}

(sorry for the length)

This produced the following output (with comments that I added):

0xBF8144D4 : EEEE1111 //local int in trace
0xBF8144D8 : BF8144F8 //beginning of trace stack frame
0xBF8144DC : 0804845A //return address for trace to func3
0xBF8144E0 : FCFCFC04 //int passed to trace
0xBF8144E4 : 08048230 //(possibly) uninitialized padding
0xBF8144E8 : 00000000 //padding
0xBF8144EC : DDDD3333 //local int in func3
0xBF8144F0 : DDDD2222 //local int in func3
0xBF8144F4 : DDDD1111 //local int in func3
0xBF8144F8 : BF814518 //beginning of func3 stack frame
0xBF8144FC : 08048431 //return address for func3 to func2
0xBF814500 : FCFCFC03 //parameter passed to func3
0xBF814504 : 00000000 //padding
0xBF814508 : 00000000 //padding
0xBF81450C : 00000000 //padding
0xBF814510 : CCCC2222 //local int in func2
0xBF814514 : CCCC1111 //local int in func2
0xBF814518 : BF814538 //beginning of func2 stack frame
0xBF81451C : 0804840F //return address for func2 to func1
0xBF814520 : FCFCFC02 //parameter passed to func2
0xBF814524 : 00000000 //padding
0xBF814528 : BF816728 //uninitialized padding
0xBF81452C : B7DF3F4E //uninitialized padding
0xBF814530 : B7EA61D9 //uninitialized padding
0xBF814534 : BBBB1111 //local int in func1
0xBF814538 : BF814558 //beginning of func1 stack frame
0xBF81453C : 080483E8 //return address for func1 to main
0xBF814540 : FCFCFC01 //parameter passed to func1
0xBF814544 : 08049FF4 //(maybe) padding
0xBF814548 : BF814568 //(maybe) padding
0xBF81454C : 080484D9 //(maybe) padding
0xBF814550 : AAAA1111 //local int in main

I was wondering if anybody could fill me in on the blank spots here... I'm running Ubuntu linux compiling with gcc 4.3.3 (on an x86 -- AMD Turion 64)

What are the 0804... numbers? What's the third address up from the bottom? Is that the return address for main? If so, why is it out of order compared to the rest of the stack? The 0x0804 numbers are return addresses, or pointers to code/data or something, while the 0xBF814 numbers are stack pointers

What's this:

0xBF814524 : 00000000 //padding?
0xBF814528 : BF816728 //I have no idea
0xBF81452C : B7DF3F4E //????
0xBF814530 : B7EA61D9 //????

seen just after the local int in func1?

Okay I have my stack dump almost completely filled in.

It looks like the the compiler wants to have the parameters pushed onto the stack starting at a 0x.......0 address, and everything between the local variables from the function before and the first parameter of the function being called seems to be padding (whether 0x00000000 or some uninitialized value). Some of them I'm unsure about because they look like code/data segment pointers, but I can't see them being used in the code: they're just there when the stack pointer gets reduced.

and I know it's a HUGE nono to touch the call stack in any kind of project, but that's okay. It's fun, right?

also

Greg wants to see the assembly, here it is

    .file   "stack.c"
    .text
.globl main
    .type   main, @function
main:
    leal    4(%esp), %ecx
    andl    $-16, %esp
    pushl   -4(%ecx)
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %ecx
    subl    $20, %esp
    movl    $-1431695087, -8(%ebp)
    movl    $-50529279, (%esp)
    call    func1
    movl    $0, %eax
    addl    $20, %esp
    popl    %ecx
    popl    %ebp
    leal    -4(%ecx), %esp
    ret
    .size   main, .-main
.globl func1
    .type   func1, @function
func1:
    pushl   %ebp
    movl    %esp, %ebp
    subl    $24, %esp
    movl    $-1145368303, -4(%ebp)
    movl    $-50529278, (%esp)
    call    func2
    leave
    ret
    .size   func1, .-func1
.globl func2
    .type   func2, @function
func2:
    pushl   %ebp
    movl    %esp, %ebp
    subl    $24, %esp
    movl    $-859041519, -4(%ebp)
    movl    $-859037150, -8(%ebp)
    movl    $-50529277, (%esp)
    call    func3
    leave
    ret
    .size   func2, .-func2
.globl func3
    .type   func3, @function
func3:
    pushl   %ebp
    movl    %esp, %ebp
    subl    $24, %esp
    movl    $-572714735, -4(%ebp)
    movl    $-572710366, -8(%ebp)
    movl    $-572705997, -12(%ebp)
    movl    $-50529276, (%esp)
    call    trace
    leave
    ret
    .size   func3, .-func3
    .section    .rodata
.LC0:
    .string "0x%08X : %08X\n"
    .text
.globl trace
    .type   trace, @function
trace:
    pushl   %ebp
    movl    %esp, %ebp
    subl    $40, %esp
    movl    $-286387951, -4(%ebp)
    leal    -4(%ebp), %eax
    movl    %eax, -8(%ebp)
.L10:
    movl    -8(%ebp), %eax
    movl    (%eax), %edx
    movl    -8(%ebp), %eax
    movl    (%eax), %eax
    movl    %edx, 12(%esp)
    movl    %eax, 8(%esp)
    movl    -8(%ebp), %eax
    movl    %eax, 4(%esp)
    movl    $.LC0, (%esp)
    call    printf
    movl    -8(%ebp), %eax
    movl    (%eax), %eax
    cmpl    $-1431695087, %eax
    setne   %al
    addl    $4, -8(%ebp)
    testb   %al, %al
    jne .L10
    leave
    ret
    .size   trace, .-trace
    .ident  "GCC: (Ubuntu 4.3.3-5ubuntu4) 4.3.3"
    .section    .note.GNU-stack,"",@progbits
Carson Myers
  • 37,678
  • 39
  • 126
  • 176
  • 1
    compile your code to assembly and peek at the output, gcc -s myfile.c -o myfile.s – nos Aug 03 '09 at 19:29
  • on x86, updated my question. I have peeked at the assembly but it does this: movl $-859041519, -4(%ebp), with decimal numbers instead of the easy-to-recognize hex, and I mean, it's otherwise really hard to follow the stack – Carson Myers Aug 03 '09 at 19:32
  • There can be several things put on the stack. Exception-Record addresses, temporary values, cookies to check for a stack overflow. You have to look at the assembly to get a glimpse for what these values are used. – Christopher Aug 03 '09 at 19:37
  • well I don't think it's exception record addresses, I didn't use any temporaries and the assembly doesn't appear to be using canaries... I think most of the weird stuff is just uninitialized padding, since I can't find any reference to it in the assembly, so it must just *happen* to be in the memory. – Carson Myers Aug 03 '09 at 20:07
  • That's probably the frame pointer, http://en.wikipedia.org/wiki/Call_stack should have an explanation – nos Aug 03 '09 at 20:08
  • haha, I know it's just wikipedia, but it makes you feel thorough and well versed when someone posts a link to some reference, and it's highlighted as visited. – Carson Myers Aug 03 '09 at 20:15

7 Answers7

5

Inspecting the stack like this seems like one step too far away. I might suggest loading your program in a debugger, switching to the assembly language view, and single stepping through every machine instruction. Understanding of the CPU stack necessarily requires an understanding of the machine instructions operating on it, and this will be a more direct way to see what's going on.

As others mentioned, the structure of the stack is also highly dependent on the processor architecture you're working with.

Greg Hewgill
  • 951,095
  • 183
  • 1,149
  • 1,285
  • well I do (mostly) understand assembly and how the stack is set up--I was just wondering what the extra data might be *for*, since examining the assembly would mostly just let me look at the instructions that put it there. And I'm working on x86 on my laptop which is usually the case for PC's (as far as I know) – Carson Myers Aug 03 '09 at 19:35
  • If you can identify the instructions that put the data there, then we will have a much better chance of being able to tell you what they're for. Sometimes the data on the stack is just up to the whim of the compiler, and the data won't make sense without the context of the code itself. – Greg Hewgill Aug 03 '09 at 19:38
  • well for those three values on top of 0xBBBB1111 I can find no reference to them in the assembly. The 0x0 values it seems are for stack alignment, and the rest (aside from a few code segment references) I seem to have figured out... But still there are a few values I'm not sure about – Carson Myers Aug 03 '09 at 19:57
  • I added the assembly, but it's pretty straight-forward – Carson Myers Aug 03 '09 at 20:11
  • Thanks. It's hard to say for sure, but `subl $24, %esp` is where most of that space above 0xBBBB1111 is going (it's uninitalised so just happens to contain whatever was there before). The compiler doesn't generate any code to put anything there (yet), so it may be a scratch area for exception handling or something. Each of your functions seems to allocate about the same minimum amount of space, which means it may be something common to all function definitions. – Greg Hewgill Aug 03 '09 at 20:24
  • Looks like laalto has it. I didn't know gcc had a default alignment option! Learn something new every day. – Greg Hewgill Aug 03 '09 at 20:34
5

Most likely those are stack canaries. Your compiler adds code to push additional data to the stack and read it back afterwards to detect stack overflows.

rpetrich
  • 32,196
  • 6
  • 66
  • 89
  • I thought that's what they might be, but I thought those were for security -- also, when I was trying to over-write the return address for a function earlier today, I had to play with the numbers and lengths of strings to get it right and probably wrote over those many times, and all that happened was that it would have a segfault, work, or just return normally – Carson Myers Aug 03 '09 at 19:37
5

I'm guessing those values starting with 0x0804 are addresses in your program's code segement (like return addresses for function calls). The ones starting with 0xBF814 that you've labeled as return addresses are addresses on the stack -- data, not code. I'm guessing they're probably frame pointers.

Nick Meyer
  • 39,212
  • 14
  • 67
  • 75
  • you're right, I printed the addresses of the functions and they began with 0x0804. The 0xBF814 are probably frame pointers as you said (which might explain why they all point to each other?) – Carson Myers Aug 03 '09 at 19:40
3

As already pointed out, the 0xBF... are frame pointers and 0x08... return addresses.

The padding is due to alignment issues. Other unrecognized values are also padding as the stack is not initalized to zero or any other value. Uninitialized variables and unused padding space will contain whatever bytes are in those memory locations.

Community
  • 1
  • 1
laalto
  • 150,114
  • 66
  • 286
  • 303
2

The 0xBF... addresses will be links to the previous stack frame:

0xBF8144D8 : BF8144F8 //return address for trace
0xBF8144DC : 0804845A //

0xBF8144F8 : BF814518 //return address for func3
0xBF8144FC : 08048431 //????

0xBF814518 : BF814538 //return address for func2?
0xBF81451C : 0804840F //????

0xBF814538 : BF814558 //return address for func1
0xBF81453C : 080483E8 //????

The 0x08... addresses will be the addresses of the code to return to in each case.

I can't speak for the other stuff on the stack; you would have to step through the assembly language and see exactly what it is doing. I guess that it is aligning the start of each frame to a specific alignment so that __attribute__((align)) (or whatever it's called these days...) works.

2

The compiler uses EBP to store the frame's base address. It's been a while so I looked at this, so I may get the details a bit wrong, but the idea is like this.

You have three steps when calling a function:

  1. The caller pushes the function's parameters onto the stack.
    • The caller uses the call instruction, which pushes the return address onto the stack, and jumps to the new function.
    • The called function pushes EBP onto the stack, and copies ESP into EBP:
    • (Note: well behaved functions will also push all the GPRs onto the stack with PUSHAD)
push EBP
mov EBP, ESP

When the function returns it:

  1. pops EBP
  2. executes the ret instruction, which pops off the return address and jumps there.
pop EBP
ret

The question is, why is EBP pushed, and why does ESP get copied into it?

When you enter the function ESP points to the lowest point on the stack for this function. Any variables you declare on the stack can be accessed as [ESP + offset_to_variable]. This is easy! But note that ESP must always point to the top of the stack, so when you declare a new variable on the stack, ESP changes. Now [ESP + offset_to_variable] isn't so great, because you have to remember what ESP was at the time the variable was allocated.

Instead of doing that, the first thing the function needs to do is to copy ESP into EBP. EBP won't change during the life of the function, so you can access all variables using `[EBP + offset_to_variable]. But now you have another problem, because if the called functions calls another function, EBP will be overwritten. That's why before copying EBP it needs to be saved onto the stack, so that it can be restored before the returning to the calling function.

Nathan Fellman
  • 122,701
  • 101
  • 260
  • 319
1

Is this a debug or release build? I'd expect some padding with the debug builds for detecting Stack Overflows.

AShelly
  • 34,686
  • 15
  • 91
  • 152
  • I'm not sure what you mean, I didn't use a debug flag during compilation and I just used gcc via command line. Compiled with `gcc stack.c -o stack`. – Carson Myers Aug 03 '09 at 19:33
  • GCC may still be including it stack-smashing protection. Try compiling with the -fno-stack-protector switch and see if you still get the weird values. – Tyler McHenry Aug 03 '09 at 19:35
  • the padding is still there. I suspect it has to do with stack alignment, but I didn't think it did THAT much alignment. – Carson Myers Aug 03 '09 at 19:48
  • actually, it looks like it wants to put the parameters starting at a 0x......0 address, and the padding does just that – Carson Myers Aug 03 '09 at 19:59