0

Following this thread ...

For this piece of code:

#include <stdio.h>

int main(void)
{
    int i;
    size_t u;

    for (i = 0; i < 10; i++) {
        u = (size_t)i;
        printf("i = %d, u = %zu\n", i, u);
    }
    return 0;
}

The output in assembly is:

EDIT: Compiled with -O2

    .file   "demo.c"
    .section    .rodata.str1.1,"aMS",@progbits,1
.LC0:
    .string "i = %d, u = %zu\n"
    .section    .text.startup,"ax",@progbits
    .p2align 4,,15
    .globl  main
    .type   main, @function
main:
.LFB3:
    .cfi_startproc
    pushq   %rbx
    .cfi_def_cfa_offset 16
    .cfi_offset 3, -16
    xorl    %ebx, %ebx
    .p2align 4,,10
    .p2align 3
.L2:
    movq    %rbx, %rdx
    movl    %ebx, %esi
    xorl    %eax, %eax
    movl    $.LC0, %edi
    addq    $1, %rbx
    call    printf
    cmpq    $10, %rbx
    jne .L2
    xorl    %eax, %eax
    popq    %rbx
    .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE3:
    .size   main, .-main
    .ident  "GCC: (Debian 4.7.2-5) 4.7.2"
    .section    .note.GNU-stack,"",@progbits

Is the conversion u = (size_t)i; consuming extra cycles?

Community
  • 1
  • 1
David Ranieri
  • 39,972
  • 7
  • 52
  • 94

3 Answers3

5

Yes, as the code is posted, certainly. Your conversion is here:

movl    -4(%rbp), %eax
cltq
movq    %rax, -16(%rbp)

Of course, this code is unoptimized, so it's not a very fair comparison. If you compile it with optimization, the compiler may realize that the values are always positive and just do a single move from whatever register holds i to %rdx that holds the third argument.

Edit:

As suspected, there is essentially no overhead in the optimized code. In this case, the compiler has converted the loop to count up u, and derive i from u instead of the other way around, so %rbx is used for the loop, and the value of i is just using %ebx, which is the lower 32 bits of %rbx - so there is no overhead in this example. I emphasise this, since there may well be other cases where converting from int to size_t will have a penalty. It completely depends on the circumstances.

Mats Petersson
  • 126,704
  • 14
  • 140
  • 227
4

yes, it does, as it changes the internal representation from 32bit to 64bit. specifically,

.L3:
    movl    -4(%rbp), %eax
    cltq
    movq    %rax, -16(%rbp)
    movq    -16(%rbp), %rdx

reads i, performs sign-extension and copying to %rdx. i'm unsure why this value has to pass through the stack - as mats pointed out, this looks like code from a non-noptimizing compiler run.

EDIT

in the optimized assembly code, the loop counter is maintained as the wider data type. afair, movs between registers don't differ in run-time cycles wrt quad or dword (indeed they don't: see table C-16 in intels pertinent doc, referenced by this SO post.

Community
  • 1
  • 1
collapsar
  • 17,010
  • 4
  • 35
  • 61
2

Not sure if this is the actual assignment that's consuming cycles for you i believe this is the assignment thats consuming cycles

for example looc at this t1.c

#include <stdio.h>

int main(void)
{
    int i;
    size_t u;

    for (i = 0; i < 10; i++) {
        printf("i = %d, u = %zu\n", i, u);
    }
    return 0;
}

and the assmebly for t1.c

        .file   "t1.c"
        .section        .rodata
.LC0:
        .string "i = %d, u = %zu\n"
        .text
.globl main
        .type   main, @function
main:
        pushl   %ebp
        movl    %esp, %ebp
        andl    $-16, %esp
        subl    $32, %esp
        movl    $0, 24(%esp)
        jmp     .L2
.L3:
        movl    $.LC0, %eax
        movl    28(%esp), %edx
        movl    %edx, 8(%esp)
        movl    24(%esp), %edx
        movl    %edx, 4(%esp)
        movl    %eax, (%esp)
        call    printf
        addl    $1, 24(%esp)
.L2:
        cmpl    $9, 24(%esp)
        jle     .L3
        movl    $0, %eax
        leave
        ret
        .size   main, .-main
        .ident  "GCC: (GNU) 4.4.6 20110731 (Red Hat 4.4.6-3)"
        .section        .note.GNU-stack,"",@progbits

in the above case no assignment atall for its ok for now

second case t2.c

#include <stdio.h>

int main(void)
{
    int i;
    size_t u;

    for (i = 0; i < 10; i++) {
        i = (size_t) u;
        printf("i = %d, u = %zu\n", i, u);
    }
    return 0;
}

and the subsequent assmebly

        .file   "t2.c"
        .section        .rodata
.LC0:
        .string "i = %d, u = %zu\n"
        .text
.globl main
        .type   main, @function
main:
        pushl   %ebp
        movl    %esp, %ebp
        andl    $-16, %esp
        subl    $32, %esp
        movl    $0, 24(%esp)
        jmp     .L2
.L3:
        movl    28(%esp), %eax
        movl    %eax, 24(%esp)
        movl    $.LC0, %eax
        movl    28(%esp), %edx
        movl    %edx, 8(%esp)
        movl    24(%esp), %edx
        movl    %edx, 4(%esp)
        movl    %eax, (%esp)
        call    printf
        addl    $1, 24(%esp)
.L2:
        cmpl    $9, 24(%esp)
        jle     .L3
        movl    $0, %eax
        leave
        ret
        .size   main, .-main
        .ident  "GCC: (GNU) 4.4.6 20110731 (Red Hat 4.4.6-3)"
        .section        .note.GNU-stack,"",@progbits

Check the statements above

movl    28(%esp), %eax
movl    %eax, 24(%esp)

now for the last example t3.c

#include <stdio.h>

int main(void)
{
    int i;
    int u;

    for (i = 0; i < 10; i++) {
        i = u;
        printf("i = %d, u = %zu\n", i, u);
    }
    return 0;
}

and the subsequent assembly

        .file   "t3.c"
        .section        .rodata
.LC0:
        .string "i = %d, u = %zu\n"
        .text
.globl main
        .type   main, @function
main:
        pushl   %ebp
        movl    %esp, %ebp
        andl    $-16, %esp
        subl    $32, %esp
        movl    $0, 24(%esp)
        jmp     .L2
.L3:
        movl    28(%esp), %eax
        movl    %eax, 24(%esp)
        movl    $.LC0, %eax
        movl    28(%esp), %edx
        movl    %edx, 8(%esp)
        movl    24(%esp), %edx
        movl    %edx, 4(%esp)
        movl    %eax, (%esp)
        call    printf
        addl    $1, 24(%esp)
.L2:
        cmpl    $9, 24(%esp)
        jle     .L3
        movl    $0, %eax
        leave
        ret
        .size   main, .-main
        .ident  "GCC: (GNU) 4.4.6 20110731 (Red Hat 4.4.6-3)"
        .section        .note.GNU-stack,"",@progbits

Now you can observe t2 and t3 and see the difference here, but really varies from arch to arch though

asio_guy
  • 3,667
  • 2
  • 19
  • 35