1

I tried this code :

#include <stdlib.h>
#include <stdio.h>

__attribute__((always_inline)) void dummy_fct(int *a){
  float b[2] = {(float)a[0] / 42,(float)a[1] / 66};
  a[0] = (int)b[0];
  a[1] = (int)b[1];
}

int main(void){
  int v[2] = {rand(),rand()};
  dummy_fct(v);
  printf("%d %d\n",v[0],v[1]);
  return 0;
}

When I compile it whit gcc and no options I get this warning (which is logical) :

 main.c:4:37: warning: always_inline function might not be inlinable [-Wattributes]
 __attribute__((always_inline)) void dummy_fct(int *a){

But when I read the generated assembly of my binary (using objdump -d my_binary) I get this :

(...)
0000000000400550 <frame_dummy>:
  400550:   55                      push   %rbp
  400551:   48 89 e5                mov    %rsp,%rbp
  400554:   5d                      pop    %rbp
  400555:   e9 36 ff ff ff          jmpq   400490 <register_tm_clones>
  40055a:   90                      nop
  40055b:   90                      nop

000000000040055c <dummy_fct>:
  40055c:   55                      push   %rbp
  40055d:   48 89 e5                mov    %rsp,%rbp
  400560:   48 89 7d e8             mov    %rdi,-0x18(%rbp)
  400564:   48 8b 45 e8             mov    -0x18(%rbp),%rax
  400568:   8b 00                   mov    (%rax),%eax
  40056a:   f3 0f 2a c0             cvtsi2ss %eax,%xmm0
  40056e:   f3 0f 10 0d ea 01 00    movss  0x1ea(%rip),%xmm1        # 400760 <_IO_stdin_used+0xc>
  400575:   00 
  400576:   f3 0f 5e c1             divss  %xmm1,%xmm0
  40057a:   f3 0f 11 45 f8          movss  %xmm0,-0x8(%rbp)
  40057f:   48 8b 45 e8             mov    -0x18(%rbp),%rax
  400583:   48 83 c0 04             add    $0x4,%rax
  400587:   8b 00                   mov    (%rax),%eax
  400589:   f3 0f 2a c0             cvtsi2ss %eax,%xmm0
  40058d:   f3 0f 10 0d cf 01 00    movss  0x1cf(%rip),%xmm1        # 400764 <_IO_stdin_used+0x10>
  400594:   00 
  400595:   f3 0f 5e c1             divss  %xmm1,%xmm0
  400599:   f3 0f 11 45 fc          movss  %xmm0,-0x4(%rbp)
  40059e:   f3 0f 10 45 f8          movss  -0x8(%rbp),%xmm0
  4005a3:   f3 0f 2c d0             cvttss2si %xmm0,%edx
  4005a7:   48 8b 45 e8             mov    -0x18(%rbp),%rax
  4005ab:   89 10                   mov    %edx,(%rax)
  4005ad:   f3 0f 10 45 fc          movss  -0x4(%rbp),%xmm0
  4005b2:   48 8b 45 e8             mov    -0x18(%rbp),%rax
  4005b6:   48 8d 50 04             lea    0x4(%rax),%rdx
  4005ba:   f3 0f 2c c0             cvttss2si %xmm0,%eax
  4005be:   89 02                   mov    %eax,(%rdx)
  4005c0:   90                      nop
  4005c1:   5d                      pop    %rbp
  4005c2:   c3                      retq   

00000000004005c3 <main>:
  4005c3:   55                      push   %rbp
  4005c4:   48 89 e5                mov    %rsp,%rbp
  4005c7:   48 83 ec 20             sub    $0x20,%rsp
  4005cb:   e8 28 fe ff ff          callq  4003f8 <rand@plt>
  4005d0:   89 45 f0                mov    %eax,-0x10(%rbp)
  4005d3:   e8 20 fe ff ff          callq  4003f8 <rand@plt>
  4005d8:   89 45 f4                mov    %eax,-0xc(%rbp)
  4005db:   48 8d 45 f0             lea    -0x10(%rbp),%rax
  4005df:   48 89 45 f8             mov    %rax,-0x8(%rbp)
  4005e3:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  4005e7:   8b 00                   mov    (%rax),%eax
  4005e9:   f3 0f 2a c0             cvtsi2ss %eax,%xmm0
  4005ed:   f3 0f 10 0d 6b 01 00    movss  0x16b(%rip),%xmm1        # 400760 <_IO_stdin_used+0xc>
  4005f4:   00 
  4005f5:   f3 0f 5e c1             divss  %xmm1,%xmm0
  4005f9:   f3 0f 11 45 e8          movss  %xmm0,-0x18(%rbp)
  4005fe:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  400602:   48 83 c0 04             add    $0x4,%rax
  400606:   8b 00                   mov    (%rax),%eax
  400608:   f3 0f 2a c0             cvtsi2ss %eax,%xmm0
  40060c:   f3 0f 10 0d 50 01 00    movss  0x150(%rip),%xmm1        # 400764 <_IO_stdin_used+0x10>
  400613:   00 
  400614:   f3 0f 5e c1             divss  %xmm1,%xmm0
  400618:   f3 0f 11 45 ec          movss  %xmm0,-0x14(%rbp)
  40061d:   f3 0f 10 45 e8          movss  -0x18(%rbp),%xmm0
  400622:   f3 0f 2c d0             cvttss2si %xmm0,%edx
  400626:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  40062a:   89 10                   mov    %edx,(%rax)
  40062c:   f3 0f 10 45 ec          movss  -0x14(%rbp),%xmm0
  400631:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  400635:   48 8d 50 04             lea    0x4(%rax),%rdx
  400639:   f3 0f 2c c0             cvttss2si %xmm0,%eax
  40063d:   89 02                   mov    %eax,(%rdx)
  40063f:   8b 55 f4                mov    -0xc(%rbp),%edx
  400642:   8b 45 f0                mov    -0x10(%rbp),%eax
  400645:   89 c6                   mov    %eax,%esi
  400647:   bf 58 07 40 00          mov    $0x400758,%edi
  40064c:   b8 00 00 00 00          mov    $0x0,%eax
  400651:   e8 82 fd ff ff          callq  4003d8 <printf@plt>
  400656:   b8 00 00 00 00          mov    $0x0,%eax
  40065b:   c9                      leaveq 
  40065c:   c3                      retq   
  40065d:   90                      nop
  40065e:   90                      nop
  40065f:   90                      nop
(...)

Of course, there is no callq to dummy_fct but I am very surprise to see that the code of the function is not in the main, and is present in the binary in a separated section.

I mean that the inlining already add some code so the binary is supposed to be larger (if the inlined code is used several times). So why the binary of the function is still present outside the main !? So the binary is even larger... I don't really get this point about inlining in gcc.

(This is more a philosophical question, there is no more point here)

Welgriv
  • 714
  • 9
  • 24
  • 2
    how about adding `static` to the function to make it local to the file? Because your example makes the function public, and the compiler can't tell whether other object file will try to call it, and linker will need then stand-alone variant. Although the linker, when linking final binary, should be able to tell there's no such usage, and may remove such function, depends what linker you use, and what optimization options. `gcc` with no options is building debug builds AFAIK, usually not worth of reasoning about machine code produced by that, except checking correctness of code. – Ped7g May 18 '18 at 09:18
  • 1
    hm, `static` will not remove the warning (only the code from binary), not sure what that warning means and how to avoid it (I'm not C expert, so my comments are from assembly point of view). – Ped7g May 18 '18 at 09:21
  • 1
    it there a reason why you declare the function not as `inline`? – Kami Kaze May 18 '18 at 10:07
  • The warning happens even at `-O3` (https://godbolt.org/g/ymEpPH). That's unsurprising because of what the answer is, though. – Peter Cordes May 18 '18 at 11:12
  • In some cases, i could see no inlining using only the `inline` keyword. see this [answer](https://stackoverflow.com/a/13228348/7758765). The real problem with this warning an inlining is that i don't really no when gcc really inline the function or not. Which could be quite embarrassing. – Welgriv May 18 '18 at 11:57
  • @Welgriv Why is it so important for you that the function is inlined under any circumstance? Don't you trust the judgement of the compiler to make the right call? – fuz May 18 '18 at 12:10
  • @fuz it is just a tool issue. I want my function inlined when I explicitly inline it. And I am confident in the fact that gcc can do this. Otherwise I just put the `-Ofast` option and problem solve... – Welgriv May 18 '18 at 12:13
  • @Welgriv You should think of inlining as nothing but an optimization. Unless there are special reasons (such as your code inspecting its own stack frame), there is little point in forcing the compiler to inline your function. The compiler has very good heuristics to decide when to inline a function. Going around them by forcing an inline where the compiler might not decide to likely makes the compiler generate worse code than usual. Also note that `-Ofast` is dangerous and can destroy your floating point code. I advice you not to use it. – fuz May 18 '18 at 12:16
  • @fuz ok, you really want know what I am doing !! Are you a spy ? ;) (it's a joke) in fact I work in a embedded environment and I really want exactly know what my assembly code will look like. The thing is that I can save some cycles (and then power) if my functions are inline but I also want the memory size to be the smallest as possible (cause memory consume also power and space). So I have to check if the cycles saved inlining the functions are worth regarding the program memory size. You can understand that I don't want to re-write my functions as macros. – Welgriv May 18 '18 at 12:49
  • @Welgriv Well that's a point. Make sure to compile with `-Os`, too, or the results are rather meaningless. Unoptimized code is weird and occupies a lot of space. – fuz May 18 '18 at 13:16
  • After verification, I can't use `-Ofast` and `-Os` at the same time (only the last flag is take into account) So what do you suggest exactly ? – Welgriv May 23 '18 at 14:36

2 Answers2

3

As you have not declared otherwise, dummy_fct is an external function and thus the compiler is obliged to generate code for it in case callers from other translation units exist. To remove this obligation, declare dummy_fct to be static.

Note further that dummy_fct was actually inlined. You can see its machine code here inside main:

  4005db:   48 8d 45 f0             lea    -0x10(%rbp),%rax
  4005df:   48 89 45 f8             mov    %rax,-0x8(%rbp)
  4005e3:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  4005e7:   8b 00                   mov    (%rax),%eax
  4005e9:   f3 0f 2a c0             cvtsi2ss %eax,%xmm0
  4005ed:   f3 0f 10 0d 6b 01 00    movss  0x16b(%rip),%xmm1        # 400760 <_IO_stdin_used+0xc>
  4005f4:   00 
  4005f5:   f3 0f 5e c1             divss  %xmm1,%xmm0
  4005f9:   f3 0f 11 45 e8          movss  %xmm0,-0x18(%rbp)
  4005fe:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  400602:   48 83 c0 04             add    $0x4,%rax
  400606:   8b 00                   mov    (%rax),%eax
  400608:   f3 0f 2a c0             cvtsi2ss %eax,%xmm0
  40060c:   f3 0f 10 0d 50 01 00    movss  0x150(%rip),%xmm1        # 400764 <_IO_stdin_used+0x10>
  400613:   00 
  400614:   f3 0f 5e c1             divss  %xmm1,%xmm0
  400618:   f3 0f 11 45 ec          movss  %xmm0,-0x14(%rbp)
  40061d:   f3 0f 10 45 e8          movss  -0x18(%rbp),%xmm0
  400622:   f3 0f 2c d0             cvttss2si %xmm0,%edx
  400626:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  40062a:   89 10                   mov    %edx,(%rax)
  40062c:   f3 0f 10 45 ec          movss  -0x14(%rbp),%xmm0
  400631:   48 8b 45 f8             mov    -0x8(%rbp),%rax
  400635:   48 8d 50 04             lea    0x4(%rax),%rdx
  400639:   f3 0f 2c c0             cvttss2si %xmm0,%eax
  40063d:   89 02                   mov    %eax,(%rdx)

While it might be a bit hard to read this code due to the lack of optimizations, careful reading confirms that this is indeed what dummy_fct does.

fuz
  • 88,405
  • 25
  • 200
  • 352
3

You forgot the magical word inline. inline __attribute((always inline)) != __attribute((always inline))

BTW same additional code may be seen sometimes even if you add the inline. That code actually will not be linked and it is used by the LTO.

As there is some confusion about it. Compiler may inline function even if you do not use the inline keyword

https://godbolt.org/g/J98BgZ

__attribute__((always_inline))

shows the compiler that it should inline even if normally it would not (for example -O0).

adding the C keyword inline makes eventually the function inline and with the attribute it makes it always inlined.

0___________
  • 60,014
  • 4
  • 34
  • 74
  • I read in this [link](https://stackoverflow.com/a/13228348/7758765) answer that `inline` keyword is not needed when `__attribute__((always_inline))` is used – Welgriv May 18 '18 at 11:54
  • And the function code is still there when I use the `-fno-lto` option. – Welgriv May 18 '18 at 12:00
  • @Welgriv `inline` is a C keyword. __attribute__.... is a compiler directive. Answer yourself if writing in the C requires the C keywords and why the compiled code looks like in your case. Not everything found in the net is worth reading :) – 0___________ May 18 '18 at 12:58
  • @Welgriv see my amended answer - especially the godbolt link. – 0___________ May 18 '18 at 13:05