-2

So I have a task to do, which requires from me to scanf a char* in assembly. I tried this code:

.data
INPUT_STRING:   .string "Give me a string: "
SCANF_STRING:   .string "%s"
PRINTF_STRING:  .string "String: %s\n"

.text
    .globl main
    .type main, @function
main:
    leal 4(%esp), %ecx
    andl $-16, %esp
    pushl -4(%ecx)
    pushl %ebp
    movl %esp, %ebp
    pushl %ecx
    subl $32, %esp
    pushl $INPUT_STRING 
    call printf #printf("Give me a string: ")
    addl $4, %esp
    pushl -12(%ebp) # char*
    pushl $SCANF_STRING # "%s"
    call scanf scanf("%s", char*)
    addl $8, %esp    
    pushl -12(%ebp)
    pushl PRINTF_STRING
    call printf #printf("String: %s\n")
    addl $16, %esp
    movl -4(%ebp), %ecx   
    xorl %eax, %eax
    leave
    leal -4(%ecx), %esp
    ret

It writes down first printf correctly, then it waits for input (so scanf works), but then when I enter anything -> Segmentation fault.

I know, that the char* should be somehow initialized, but how can I do it from the assembly level?

I am compiling it on Manjaro 64 bit, with gcc -m32

rkhb
  • 14,159
  • 7
  • 32
  • 60
minecraftplayer1234
  • 2,127
  • 4
  • 27
  • 57

1 Answers1

2

GCC's stack-alignment code on entry to main is over-complicated:

leal 4(%esp), %ecx
andl $-16, %esp
pushl -4(%ecx)
pushl %ebp
movl %esp, %ebp
pushl %ecx
subl $32, %esp
...
leave
leal -4(%ecx), %esp
ret

Do it so:

pushl %ebp
movl %esp, %ebp
subl $32, %esp        # Space for 32 local bytes
andl $-16, %esp       # Alignment by 16
...
leave
ret

The version of the i386 System V ABI used on modern Linux does guarantee/require 16-byte stack alignment before a call, so you could have re-aligned with 3 pushes (including the push %ebp) instead of an and. Unlike x86-64, most i386 library functions don't get compiled to use movaps or movdqa 16-byte aligned load/store on locals in their stack space, so you can often get away with unaligning the stack like you're doing with PUSHes before scanf. (ESP % 16 == 0 when you call printf the first time, though; that's correct.)


You want to use 12 bytes of the local stack frame for the string. scanf needs the start address of those 12 bytes. The address for that area isn't known at compile time. A -12(%ebp) gives you the value at this address, not the address itself. LEA is the instruction to calculate an address. So you have to insert this instruction to get the address at run time and to pass it to the C function:

leal -12(%ebp), %eax
pushl %eax # char*

And this is the working example (minor mistakes also corrected):

.data
INPUT_STRING:   .string "Give me a string: "
SCANF_STRING:   .string "%11s"      ##### Accept only 11 characters (-1 because terminating null)
PRINTF_STRING:  .string "String: %s\n"

.text
    .globl main
    .type main, @function
main:
    pushl %ebp
    movl %esp, %ebp
    subl $32, %esp

    mov $32, %ecx
    mov %esp, %edi
    mov $88, %al
    rep stosb

    pushl $INPUT_STRING
    call printf                         # printf("Give me a string: ")
    addl $4, %esp

    leal -12(%ebp), %eax
    pushl %eax                          # char*
    pushl $SCANF_STRING                 # "%s"
    call scanf                          # scanf("%s", char*)
    addl $8, %esp

    leal -12(%ebp), %eax
    pushl %eax                          # char*
    pushl $PRINTF_STRING            ##### '$' was missing
    call printf                         # printf("String: %s\n")
    addl $8, %esp                   ##### 16 was wrong. Only 2 DWORD à 4 bytes were pushed

    leave
    ret
Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
rkhb
  • 14,159
  • 7
  • 32
  • 60
  • But then, you put up a `char*` on `-12(%ebp)` and what if it has a really big length? Where does it store all the chars? – minecraftplayer1234 Apr 23 '17 at 20:29
  • @Frynio: In the stack. It's a part of "subl $32, %esp". For a really big or variable length you would use `malloc` like in C. If you're using the C library - think like a C programmer :-) – rkhb Apr 23 '17 at 20:34
  • Yes, I know. But then what's the limit for the number of characters? – minecraftplayer1234 Apr 23 '17 at 20:37
  • 1
    The stack sets the limit. Last time when I overflowed it the default value was in Windows 1 MB and in Linux 8 MB. Your personal limit is `-12(%ebp)` = 11 characters + terminating null. Because the stack frame is not used for other variables in your example, you can increase it up to `-32(%ebp)` = 31 characters + terminator. You want 999 characters and one null:: `subl $1000, %esp` and `-1000(%ebp)` – rkhb Apr 23 '17 at 20:45
  • The `leal 4(%esp), %ecx` stuff is from an older GCC version, duplicating a whole stack frame including a return address. [Trying to understand gcc's complicated stack-alignment at the top of main that copies the return address](https://stackoverflow.com/q/1147623) . It's not necessary at all with a modern glibc (CRT) and kernel that preserve the incoming 16-byte stack alignment from `_start`, but `gcc -m32` is conservative about `main` even when targeting modern Linux. Stack alignment by 16 *is* needed in the version of the i386 System V ABI used on Linux, though. – Peter Cordes Sep 11 '22 at 15:49