8

I got motivated from tail call optimization question What Is Tail Call Optimization?

So, I decided to see how can I do it in plain C.

So, I wrote the 2 factorial programs, 1st where tail call optimization can be applied. I call this fact function as fact(n, 1).

unsigned long long int fact(int n, int cont)
{
      if(n == 0)
            return cont;

      else return fact(n-1, n * cont);
}

2nd is the normal recursion where multiple stack frames are required.

unsigned long long int fact(int n)
{
    if(n == 0)
        return 1;

    else return n * fact(n-1);
}

This is the assembly generated by a 32 bit compiler for the former with -O2

0x8048470 <fact>:   push   %ebp
0x8048471 <fact+1>: mov    %esp,%ebp
0x8048473 <fact+3>: mov    0x8(%ebp),%edx
0x8048476 <fact+6>: mov    0xc(%ebp),%eax
0x8048479 <fact+9>: test   %edx,%edx
0x804847b <fact+11>:    je     0x8048488 <fact+24>
0x804847d <fact+13>:    lea    0x0(%esi),%esi
0x8048480 <fact+16>:    imul   %edx,%eax
0x8048483 <fact+19>:    sub    $0x1,%edx
0x8048486 <fact+22>:    jne    0x8048480 <fact+16>
0x8048488 <fact+24>:    mov    %eax,%edx
0x804848a <fact+26>:    sar    $0x1f,%edx
0x804848d <fact+29>:    pop    %ebp
0x804848e <fact+30>:    ret    

This is the assembly generated by a 32 bit compiler for latter with -O2.

0x8048470 <fact>:   push   %ebp
0x8048471 <fact+1>: mov    %esp,%ebp
0x8048473 <fact+3>: push   %edi
0x8048474 <fact+4>: push   %esi
0x8048475 <fact+5>: push   %ebx
0x8048476 <fact+6>: sub    $0x14,%esp
0x8048479 <fact+9>: mov    0x8(%ebp),%eax
0x804847c <fact+12>:    movl   $0x1,-0x18(%ebp)
0x8048483 <fact+19>:    movl   $0x0,-0x14(%ebp)
0x804848a <fact+26>:    test   %eax,%eax
0x804848c <fact+28>:    je     0x80484fc <fact+140>
0x804848e <fact+30>:    mov    %eax,%ecx
0x8048490 <fact+32>:    mov    %eax,%esi
0x8048492 <fact+34>:    sar    $0x1f,%ecx
0x8048495 <fact+37>:    add    $0xffffffff,%esi
0x8048498 <fact+40>:    mov    %ecx,%edi
0x804849a <fact+42>:    mov    %eax,%edx
0x804849c <fact+44>:    adc    $0xffffffff,%edi
0x804849f <fact+47>:    sub    $0x1,%eax
0x80484a2 <fact+50>:    mov    %eax,-0x18(%ebp)
0x80484a5 <fact+53>:    movl   $0x0,-0x14(%ebp)
0x80484ac <fact+60>:    sub    -0x18(%ebp),%esi
0x80484af <fact+63>:    mov    %edx,-0x20(%ebp)
0x80484b2 <fact+66>:    sbb    -0x14(%ebp),%edi
0x80484b5 <fact+69>:    movl   $0x1,-0x18(%ebp)
0x80484bc <fact+76>:    movl   $0x0,-0x14(%ebp)
0x80484c3 <fact+83>:    mov    %ecx,-0x1c(%ebp)
0x80484c6 <fact+86>:    xchg   %ax,%ax
0x80484c8 <fact+88>:    mov    -0x14(%ebp),%ecx
0x80484cb <fact+91>:    mov    -0x18(%ebp),%ebx
0x80484ce <fact+94>:    imul   -0x1c(%ebp),%ebx
0x80484d2 <fact+98>:    imul   -0x20(%ebp),%ecx
0x80484d6 <fact+102>:   mov    -0x18(%ebp),%eax
0x80484d9 <fact+105>:   mull   -0x20(%ebp)
0x80484dc <fact+108>:   add    %ebx,%ecx
0x80484de <fact+110>:   add    %ecx,%edx
0x80484e0 <fact+112>:   addl   $0xffffffff,-0x20(%ebp)
0x80484e4 <fact+116>:   adcl   $0xffffffff,-0x1c(%ebp)
0x80484e8 <fact+120>:   mov    -0x1c(%ebp),%ebx
0x80484eb <fact+123>:   mov    %eax,-0x18(%ebp)
0x80484ee <fact+126>:   mov    -0x20(%ebp),%eax
0x80484f1 <fact+129>:   mov    %edx,-0x14(%ebp)
0x80484f4 <fact+132>:   xor    %edi,%ebx
0x80484f6 <fact+134>:   xor    %esi,%eax
0x80484f8 <fact+136>:   or     %eax,%ebx
0x80484fa <fact+138>:   jne    0x80484c8 <fact+88>
0x80484fc <fact+140>:   mov    -0x18(%ebp),%eax
0x80484ff <fact+143>:   mov    -0x14(%ebp),%edx
0x8048502 <fact+146>:   add    $0x14,%esp
0x8048505 <fact+149>:   pop    %ebx
0x8048506 <fact+150>:   pop    %esi
0x8048507 <fact+151>:   pop    %edi
0x8048508 <fact+152>:   pop    %ebp
0x8048509 <fact+153>:   ret    

Compiling both the programs and looking at the assembly generated, both the programs still have recursive calls. But, when I compile with -O2 option(assembly posted above) in the former, I see no recursive calls at all and so I think gcc does tail call optimization stuff.

But when I compile the latter with -O2 option, it also removes the recursive calls and instead puts quite a large number of assembly instructions as compared to what happens to former on -O2.

I wanted to understand precisely what is the compiler doing in the latter, and why couldnt it transform to the assembly generated by former even with O4.

Community
  • 1
  • 1
0xhacker
  • 1,091
  • 2
  • 14
  • 26
  • 1
    You should include the generated assembly in your question (the relevant parts), that would help answerers comment on what is happening. – Pascal Cuoq Mar 22 '12 at 14:42

3 Answers3

5

Program 2 does long long calculations, progtlram 1 doesn't.

n. m. could be an AI
  • 112,515
  • 14
  • 128
  • 243
4

The difference is that the first version uses an int variable for the calculation, which is then extended to unsigned long long at the end, while the latter uses an unsigned long long all the way.

Simon Richter
  • 28,572
  • 1
  • 42
  • 64
0

The compiler appears to have optimised the recursive calls into loops. Note that your C code only has forward branches (if-then-else), but the assembler has backward branches (loops).

If you really want to see a tail-call optimization in action, have it call a different function. Of course, that's not recursion, but the compiler is too smart for small test-cases like this.

ams
  • 24,923
  • 4
  • 54
  • 75