There's a kernel source that uses #define
s in a very interesting way to define several different named functions with the same body. This solves the problem of having two different functions to maintain. (I forgot which one it was...). My idea is based on this same principle.
The way to use the defines is that you'll define the inline function on the compilation unit you need it. To demonstrate the method I'll use a simple function:
int add(int a, int b);
It works like this: you make a function generator #define
in a header file and declare the function prototype of the normal version of the function (the one not inlined).
Then you declare two separate function generators, one for the normal function and one for the inline function. The inline function you declare as static __inline__
. When you need to call the inline function in one of your files, you use the generator define to get the source for it. In all other files you need to use the normal function, you just include the header with the prototype.
The code was tested on:
Intel(R) Core(TM) i5-3330 CPU @ 3.00GHz
Kernel Version: 3.16.0-49-generic
GCC 4.8.4
Code is worth more than a thousand words, so:
File Hierarchy
+
| Makefile
| add.h
| add.c
| loop.c
| loop2.c
| loop3.c
| loops.h
| main.c
add.h
#define GENERATE_ADD(type, prefix) \
type int prefix##add(int a, int b) { return a + b; }
#define DEFINE_ADD() GENERATE_ADD(,)
#define DEFINE_INLINE_ADD() GENERATE_ADD(static __inline__, inline_)
int add(int, int);
This doesn't look nice, but cuts the work of maintaining two different functions. The function is fully defined within the GENERATE_ADD(type,prefix)
macro, so if you ever need to change the function, you change this macro and everything else changes.
Next, DEFINE_ADD()
will be called from add.c
to generate the normal version of add
. DEFINE_INLINE_ADD()
will give you access to a function called inline_add
, which has the same signature as your normal add
function, but it has a different name (the inline_ prefix).
Note: I didn't use the __attribute((always_inline))__
when using the -O3
flag - the __inline__
did the job. However, if you don't wanna use -O3
, use:
#define DEFINE_INLINE_ADD() GENERATE_ADD(static __inline__ __attribute__((always_inline)), inline_)
add.c
#include "add.h"
DEFINE_ADD()
Simple call to the DEFINE_ADD()
macro generator. This will declare the normal version of the function (the one that won't get inlined).
loop.c
#include <stdio.h>
#include "add.h"
DEFINE_INLINE_ADD()
int loop(void)
{
register int i;
for (i = 0; i < 100000; i++)
printf("%d\n", inline_add(i + 1, i + 2));
return 0;
}
Here in loop.c
you can see the call to DEFINE_INLINE_ADD()
. This gives this function access to the inline_add
function. When you compile, all inline_add
function will be inlined.
loop2.c
#include <stdio.h>
#include "add.h"
int loop2(void)
{
register int i;
for (i = 0; i < 100000; i++)
printf("%d\n", add(i + 1, i + 2));
return 0;
}
This is to show you can use the normal version of add
normally from other files.
loop3.c
#include <stdio.h>
#include "add.h"
DEFINE_INLINE_ADD()
int loop3(void)
{
register int i;
printf ("add: %d\n", add(2,3));
printf ("add: %d\n", add(4,5));
for (i = 0; i < 100000; i++)
printf("%d\n", inline_add(i + 1, i + 2));
return 0;
}
This is to show that you can use both the functions in the same compilation unit, yet one of the functions will be inlined, and the other wont (see GDB disass bellow for details).
loops.h
/* prototypes for main */
int loop (void);
int loop2 (void);
int loop3 (void);
main.c
#include <stdio.h>
#include <stdlib.h>
#include "add.h"
#include "loops.h"
int main(void)
{
printf("%d\n", add(1,2));
printf("%d\n", add(2,3));
loop();
loop2();
loop3();
return 0;
}
Makefile
CC=gcc
CFLAGS=-Wall -pedantic --std=c11
main: add.o loop.o loop2.o loop3.o main.o
${CC} -o $@ $^ ${CFLAGS}
add.o: add.c
${CC} -c $^ ${CFLAGS}
loop.o: loop.c
${CC} -c $^ -O3 ${CFLAGS}
loop2.o: loop2.c
${CC} -c $^ ${CFLAGS}
loop3.o: loop3.c
${CC} -c $^ -O3 ${CFLAGS}
If you use the __attribute__((always_inline))
you can change the Makefile
to:
CC=gcc
CFLAGS=-Wall -pedantic --std=c11
main: add.o loop.o loop2.o loop3.o main.o
${CC} -o $@ $^ ${CFLAGS}
%.o: %.c
${CC} -c $^ ${CFLAGS}
Compilation
$ make
gcc -c add.c -Wall -pedantic --std=c11
gcc -c loop.c -O3 -Wall -pedantic --std=c11
gcc -c loop2.c -Wall -pedantic --std=c11
gcc -c loop3.c -O3 -Wall -pedantic --std=c11
gcc -Wall -pedantic --std=c11 -c -o main.o main.c
gcc -o main add.o loop.o loop2.o loop3.o main.o -Wall -pedantic --std=c11
Disassembly
$ gdb main
(gdb) disass add
0x000000000040059d <+0>: push %rbp
0x000000000040059e <+1>: mov %rsp,%rbp
0x00000000004005a1 <+4>: mov %edi,-0x4(%rbp)
0x00000000004005a4 <+7>: mov %esi,-0x8(%rbp)
0x00000000004005a7 <+10>:mov -0x8(%rbp),%eax
0x00000000004005aa <+13>:mov -0x4(%rbp),%edx
0x00000000004005ad <+16>:add %edx,%eax
0x00000000004005af <+18>:pop %rbp
0x00000000004005b0 <+19>:retq
(gdb) disass loop
0x00000000004005c0 <+0>: push %rbx
0x00000000004005c1 <+1>: mov $0x3,%ebx
0x00000000004005c6 <+6>: nopw %cs:0x0(%rax,%rax,1)
0x00000000004005d0 <+16>:mov %ebx,%edx
0x00000000004005d2 <+18>:xor %eax,%eax
0x00000000004005d4 <+20>:mov $0x40079d,%esi
0x00000000004005d9 <+25>:mov $0x1,%edi
0x00000000004005de <+30>:add $0x2,%ebx
0x00000000004005e1 <+33>:callq 0x4004a0 <__printf_chk@plt>
0x00000000004005e6 <+38>:cmp $0x30d43,%ebx
0x00000000004005ec <+44>:jne 0x4005d0 <loop+16>
0x00000000004005ee <+46>:xor %eax,%eax
0x00000000004005f0 <+48>:pop %rbx
0x00000000004005f1 <+49>:retq
(gdb) disass loop2
0x00000000004005f2 <+0>: push %rbp
0x00000000004005f3 <+1>: mov %rsp,%rbp
0x00000000004005f6 <+4>: push %rbx
0x00000000004005f7 <+5>: sub $0x8,%rsp
0x00000000004005fb <+9>: mov $0x0,%ebx
0x0000000000400600 <+14>:jmp 0x400625 <loop2+51>
0x0000000000400602 <+16>:lea 0x2(%rbx),%edx
0x0000000000400605 <+19>:lea 0x1(%rbx),%eax
0x0000000000400608 <+22>:mov %edx,%esi
0x000000000040060a <+24>:mov %eax,%edi
0x000000000040060c <+26>:callq 0x40059d <add>
0x0000000000400611 <+31>:mov %eax,%esi
0x0000000000400613 <+33>:mov $0x400794,%edi
0x0000000000400618 <+38>:mov $0x0,%eax
0x000000000040061d <+43>:callq 0x400470 <printf@plt>
0x0000000000400622 <+48>:add $0x1,%ebx
0x0000000000400625 <+51>:cmp $0x1869f,%ebx
0x000000000040062b <+57>:jle 0x400602 <loop2+16>
0x000000000040062d <+59>:mov $0x0,%eax
0x0000000000400632 <+64>:add $0x8,%rsp
0x0000000000400636 <+68>:pop %rbx
0x0000000000400637 <+69>:pop %rbp
0x0000000000400638 <+70>:retq
(gdb) disass loop3
0x0000000000400640 <+0>: push %rbx
0x0000000000400641 <+1>: mov $0x3,%esi
0x0000000000400646 <+6>: mov $0x2,%edi
0x000000000040064b <+11>:mov $0x3,%ebx
0x0000000000400650 <+16>:callq 0x40059d <add>
0x0000000000400655 <+21>:mov $0x400798,%esi
0x000000000040065a <+26>:mov %eax,%edx
0x000000000040065c <+28>:mov $0x1,%edi
0x0000000000400661 <+33>:xor %eax,%eax
0x0000000000400663 <+35>:callq 0x4004a0 <__printf_chk@plt>
0x0000000000400668 <+40>:mov $0x5,%esi
0x000000000040066d <+45>:mov $0x4,%edi
0x0000000000400672 <+50>:callq 0x40059d <add>
0x0000000000400677 <+55>:mov $0x400798,%esi
0x000000000040067c <+60>:mov %eax,%edx
0x000000000040067e <+62>:mov $0x1,%edi
0x0000000000400683 <+67>:xor %eax,%eax
0x0000000000400685 <+69>:callq 0x4004a0 <__printf_chk@plt>
0x000000000040068a <+74>:nopw 0x0(%rax,%rax,1)
0x0000000000400690 <+80>:mov %ebx,%edx
0x0000000000400692 <+82>:xor %eax,%eax
0x0000000000400694 <+84>:mov $0x40079d,%esi
0x0000000000400699 <+89>:mov $0x1,%edi
0x000000000040069e <+94>:add $0x2,%ebx
0x00000000004006a1 <+97>:callq 0x4004a0 <__printf_chk@plt>
0x00000000004006a6 <+102>:cmp $0x30d43,%ebx
0x00000000004006ac <+108>:jne 0x400690 <loop3+80>
0x00000000004006ae <+110>:xor %eax,%eax
0x00000000004006b0 <+112>:pop %rbx
0x00000000004006b1 <+113>:retq
Symbol table
$ objdump -t main | grep add
0000000000000000 l df *ABS* 0000000000000000 add.c
000000000040059d g F .text 0000000000000014 add
$ objdump -t main | grep loop
0000000000000000 l df *ABS* 0000000000000000 loop.c
0000000000000000 l df *ABS* 0000000000000000 loop2.c
0000000000000000 l df *ABS* 0000000000000000 loop3.c
00000000004005c0 g F .text 0000000000000032 loop
00000000004005f2 g F .text 0000000000000047 loop2
0000000000400640 g F .text 0000000000000072 loop3
$ objdump -t main | grep main
main: file format elf64-x86-64
0000000000000000 l df *ABS* 0000000000000000 main.c
0000000000000000 F *UND* 0000000000000000 __libc_start_main@@GLIBC_2.2.5
00000000004006b2 g F .text 000000000000005a main
$ objdump -t main | grep inline
$
Well, that's it. After 3 hours of banging my head in the keyboard trying to figure it out, this was the best I could come up with. Feel free to point any errors, I'll really appreciate it. I got really interested in this particular inline one function call.