See my example here OR the following c++ & matching assembly
IMPLICIT INLINE
#include <iostream>
int func(int i)
{
return i * i;
}
int main(int argc, char *argv[]) {
auto value = atoi(argv[1]);
std::cout << func(value);
value = atoi(argv[2]);
std::cout << func(value);
return 1;
}
results in
func(int):
imul edi, edi
mov eax, edi
ret
main:
push rbx
mov rdi, QWORD PTR [rsi+8]
mov rbx, rsi
mov edx, 10
xor esi, esi
call strtol
imul eax, eax
mov edi, OFFSET FLAT:std::cout
mov esi, eax
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
mov rdi, QWORD PTR [rbx+16]
mov edx, 10
xor esi, esi
call strtol
imul eax, eax
mov edi, OFFSET FLAT:std::cout
mov esi, eax
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
mov eax, 1
pop rbx
ret
_GLOBAL__sub_I__Z4funci:
sub rsp, 8
mov edi, OFFSET FLAT:std::__ioinit
call std::ios_base::Init::Init()
mov edx, OFFSET FLAT:__dso_handle
mov esi, OFFSET FLAT:std::__ioinit
mov edi, OFFSET FLAT:std::ios_base::Init::~Init()
add rsp, 8
jmp __cxa_atexit
EXPLICIT INLINE
#include <iostream>
inline int func(int i)
{
return i * i;
}
int main(int argc, char *argv[]) {
auto value = atoi(argv[1]);
std::cout << func(value);
value = atoi(argv[2]);
std::cout << func(value);
return 1;
}
results in
main:
push rbx
mov rdi, QWORD PTR [rsi+8]
mov rbx, rsi
mov edx, 10
xor esi, esi
call strtol
imul eax, eax
mov edi, OFFSET FLAT:std::cout
mov esi, eax
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
mov rdi, QWORD PTR [rbx+16]
mov edx, 10
xor esi, esi
call strtol
imul eax, eax
mov edi, OFFSET FLAT:std::cout
mov esi, eax
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
mov eax, 1
pop rbx
ret
_GLOBAL__sub_I_main:
sub rsp, 8
mov edi, OFFSET FLAT:std::__ioinit
call std::ios_base::Init::Init()
mov edx, OFFSET FLAT:__dso_handle
mov esi, OFFSET FLAT:std::__ioinit
mov edi, OFFSET FLAT:std::ios_base::Init::~Init()
add rsp, 8
jmp __cxa_atexit
In the example, if the line 5 is commented out, optimisation of the code inlines the function 'func' at the two call sites, but it leaves the assembly of func in the produced binary. However, if 'func' is explicitly inlined, the function does not exist in the output assembly.
Why does the GCC optimiser leave implicitly inlined functions in the compiled assembly, even though the operations of the inlined function are truly inlined with the calling code?