0

I am writing a codegen problem, in which the output is x86/64 assembly code file. I have the following output seqeunce

mov -0x8(%rbp), %r10     // r10 = 0 at the moment
mov $0xa, %r11           // r11 = 10
cmp %r11, %r10       
setg -0x10(%rbp)         // -0x10(%rbp) = 0
mov -0x10(%rbp), %r10    // %r10 is desired to be 0, but here is the problem

according to the debugger, r10 is set to r10 0x100000000 4294967296

I cannot figure out why the content of %r10 is not set to the expected value 0

Is there any convention that I missed ?

the whole generated x86/64 program is

    .text
    .globl  _f1
_f1:
    pushq   %rbp
    movq    %rsp, %rbp
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    jmp f1.start
    .text
f1.start:
    movq    -8(%rbp), %r10
    movq    $10, %r11
    cmpq    %r11, %r10
    setg    -16(%rbp)
    movq    -16(%rbp), %r10
    cmpq    $1, %r10
    je  f1.then
    jmp f1.end
    .text
f1.then:
    movq    $1, %rax
    movq    %rbp, %rsp
    popq    %rbp
    retq    
    .text
f1.end:
    movq    $0, %rax
    movq    %rbp, %rsp
    popq    %rbp
    retq    
    .text
    .globl  _f2
_f2:
    pushq   %rbp
    movq    %rsp, %rbp
    subq    $16, %rsp
    movq    %rdi, -8(%rbp)
    jmp f2.start
    .text
f2.start:
    movq    -8(%rbp), %r10
    movq    $10, %r11
    cmpq    %r11, %r10
    setg    -16(%rbp)
    movq    -16(%rbp), %r10
    cmpq    $1, %r10
    je  f2.then
    jmp f2.end
    .text
f2.then:
    movq    $1, %rax
    movq    %rbp, %rsp
    popq    %rbp
    retq    
    .text
f2.end:
    movq    $0, %rax
    movq    %rbp, %rsp
    popq    %rbp
    retq    
    .text
    .globl  _main
_main:
    pushq   %rbp
    movq    %rsp, %rbp
    subq    $40, %rsp
    movq    $0, %rdi
    callq   _f1
    movq    %rax, -24(%rbp)
    movq    $15, %rdi
    callq   _f2
    movq    %rax, -32(%rbp)
    movq    -24(%rbp), %r10
    movq    -32(%rbp), %r11
    addq    %r10, %r11
    movq    %r11, -40(%rbp)
    movq    -40(%rbp), %rax
    movq    %rbp, %rsp
    popq    %rbp
    retq    

You can save it into test.s, and by running following command to generate a executable clang -Wno-override-module -O1 -Wall -fno-asynchronous-unwind-tables -mstackrealign -o test test.s

JinLing
  • 9
  • 6
  • 3
    `setg` sets a single byte. However, you proceed to read an entire qword. Are you sure this is correct? – fuz Mar 04 '21 at 15:03
  • 1
    Why bother with memory here? `setg %r10b` to set the low byte of R10. Preferably after zeroing the full R10 with `xor %r10d, %r10d` before the compare. Or use `movzbl %r10b, %r10d` afterward to zero-extend to the full reg, although that's worse, [especially within the same register defeating mov-elimination](https://stackoverflow.com/questions/33666617/what-is-the-best-way-to-set-a-register-to-zero-in-x86-assembly-xor-mov-or-and/33668295#33668295). You could have used an immediate compare, like `cmpq $0xa, -0x8(%rbp)`. – Peter Cordes Mar 04 '21 at 15:07
  • 1
    Anyway, your question title doesn't reflect the problem. If you use a debugger to single-step your code, you'll see that `setg` is only writing 1 byte, as per the manual https://www.felixcloutier.com/x86/setcc. This is a really badly / inconveniently designed instruction instruction that unfortunately AMD64 didn't fix. – Peter Cordes Mar 04 '21 at 15:09

1 Answers1

1

Answered in comments, but just to give it a full answer:

The setcc conditional set instructions, rather inconveniently, only set a single byte. So setg -16(%rbp) sets the byte at -16(%rbp), and then movq -16(%rbp), %r10 loads the quadword whose low byte is the result of the set, and whose high 7 bytes are whatever garbage happened to previously be in bytes -15(%rbp)..-9(%rbp).

So, you need to generate code that either explicitly zeroes the whole quadword before the set, or else ignores all the other bytes (e.g. by replacing movq -16(%rbp), %r10 with movzbl -16(%rbp), %r10d), or handles the issue some other way.

Nate Eldredge
  • 48,811
  • 6
  • 54
  • 82