3

I implemented the following tail-call recursive fibonacci function to try out gcc's Tail Call Optimization (as mentioned here):

#include <stdint.h>
uint64_t fib_tc( uint8_t n) {
  uint64_t fib_helper(uint8_t n, uint64_t acc, uint64_t prev) {
    if(n == 0) 
      return acc;
    else
      return fib_helper( n-1, acc+prev, acc);
  }
  fib_helper(n,1,0);
}

Initially, I did not specify a -O level (so it defaults to 1). The function worked as expected with -O1. Later, I tried compiling with -O2, but that broke the function - it always returns 0. Same result when using -O3.

I took a look at the assembly output with -O2 (using: gcc -O2 -std=gnu99 -S fib_tc.c ) and see the following assembly:

    .file   "fib_tc.c"
    .text
    .p2align 4,,15
    .type   fib_helper.1752, @function
fib_helper.1752:
.LFB1:
    .cfi_startproc
    testb   %dil, %dil
    movq    %rsi, %rax
    jne .L4
    rep ret
    .p2align 4,,10
    .p2align 3
.L4:
    leaq    (%rsi,%rdx), %rsi
    subl    $1, %edi
    movq    %rax, %rdx
    movzbl  %dil, %edi
    jmp fib_helper.1752
    .cfi_endproc
.LFE1:
    .size   fib_helper.1752, .-fib_helper.1752
    .p2align 4,,15
    .globl  fib_tc
    .type   fib_tc, @function
fib_tc:
.LFB0:
    .cfi_startproc
    testb   %dil, %dil
    jne .L11
    ret
    .p2align 4,,10
    .p2align 3
.L11:
    subl    $1, %edi
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    movl    $1, %edx
    movzbl  %dil, %edi
    movl    $1, %esi
    call    fib_helper.1752
    addq    $8, %rsp
   .cfi_def_cfa_offset 8
    ret
    .cfi_endproc
.LFE0:
    .size   fib_tc, .-fib_tc
    .ident  "GCC: (GNU) 4.8.2 20131212 (Red Hat 4.8.2-7)"
    .section    .note.GNU-stack,"",@progbits

I notice that with -O2 it seems to have done the TCO (notice the line above: jmp fib_helper.1752). However, this is the non-working version that always returns 0.

I then generated assembly using -O1 to check the difference ( gcc -O1 -std=gnu99 -S fib_tc.c -o fib_tc1.S ):

    .file   "fib_tc.c"
    .text
    .type   fib_helper.1752, @function
fib_helper.1752:
.LFB1:
    .cfi_startproc
    movq    %rsi, %rax
    testb   %dil, %dil
    je  .L6
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    leaq    (%rsi,%rdx), %rax
    subl    $1, %edi
    movzbl  %dil, %edi
    movq    %rsi, %rdx
    movq    %rax, %rsi
    call    fib_helper.1752
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
.L6:
    rep ret
    .cfi_endproc
.LFE1:
    .size   fib_helper.1752, .-fib_helper.1752
    .globl  fib_tc
    .type   fib_tc, @function
fib_tc:
.LFB0:
    .cfi_startproc
    subq    $8, %rsp
    .cfi_def_cfa_offset 16
    movzbl  %dil, %edi
    movl    $0, %edx
    movl    $1, %esi
    call    fib_helper.1752
    addq    $8, %rsp
    .cfi_def_cfa_offset 8
   ret
   .cfi_endproc
.LFE0:
    .size   fib_tc, .-fib_tc
    .ident  "GCC: (GNU) 4.8.2 20131212 (Red Hat 4.8.2-7)"
   .section .note.GNU-stack,"",@progbits

This version (which works) does not seem to have been tail call optimized (based on the call to fib_helper above).

As shown above I'm using gcc 4.8.2 on Fedora 20 Linux (64 bit).

Before I conclude that there is a bug in gcc related to TCO I wanted to check to make sure I haven't done something wrong in the code or in my analysis.

EDIT: as it was pointed out in the comments that there could be an issue with nested functions, I also tried using separate functions as follows and that always returns 0 as well:

uint64_t fib_helper(uint8_t n, uint64_t acc, uint64_t prev) {
  if(n == 0) 
    return acc;
  else
    return fib_helper( n-1, acc+prev, acc);
}

uint64_t fib_tc( uint8_t n) {
  fib_helper(n,1,0);
}

EDIT2: As pointed out in the comments below, I was missing the return (I guess I've been doing too much OCaml lately :) It should be:

uint64_t fib_tc( uint8_t n) {
  uint64_t fib_helper(uint8_t n, uint64_t acc, uint64_t prev) {
    if(n == 0) 
      return acc;
    else
      return fib_helper( n-1, acc+prev, acc);
  }
  return fib_helper(n,1,0);
}
Community
  • 1
  • 1
aneccodeal
  • 8,531
  • 7
  • 45
  • 74
  • Nested functions are a __massive hack__. Please don't use them. – Bill Lynch Jul 02 '14 at 17:27
  • @sharth: I also tried the same code un-nested with the same result. – aneccodeal Jul 02 '14 at 17:28
  • 3
    Typo? `fib_helper(n,1,0);` -> `return fib_helper(n,1,0);` ... using `-Wall` flags the code with a warning. – Shafik Yaghmour Jul 02 '14 at 17:32
  • @shafik: ah, yes, exactly! That now works with -O2. Both the nested and non-nested version work now. I've been doing too much OCaml programming lately where the last evaluation is returned :) – aneccodeal Jul 02 '14 at 17:34
  • 2
    Seems to just be that you forgot to return a value! I'm surprised it compiled. – Brandon Yates Jul 02 '14 at 17:35
  • 2
    It is just undefined behavior, falling off the end of a value returning function is undefined, see [my answer here](http://stackoverflow.com/a/20614325/1708801). – Shafik Yaghmour Jul 02 '14 at 17:36
  • You claim to see `jmp fib_helper.1752` in your first assembly dump, but I only see `call fib_helper.1752`. Is that what you meant? – Useless Jul 02 '14 at 17:44
  • @Useless look in the .L4 section. – aneccodeal Jul 02 '14 at 17:46
  • Got it - must have been a fit of blindness. So, when I run your original code, I get the same result. When I run the non-nested version, with the `return` added, I get the correct result. – Useless Jul 02 '14 at 18:07
  • @Useless Even the nested one works for me with the final return added, though it has to be noted that it will only compile with gcc (clang is not happy with it and probably no other non-gcc compiler will accept a nested function). – aneccodeal Jul 02 '14 at 18:46

0 Answers0