1

I decided to compile a very basic C program and take a look at the generated code with objdump -d.

int main(int argc, char *argv[]) {
    exit(0);
}

After compiling it with gcc test.c -s -o test.o and then disassembling with objdump -d my text segment looked like this:

Disassembly of section .text:

0000000000001050 <.text>:
    1050:       31 ed                   xor    %ebp,%ebp
    1052:       49 89 d1                mov    %rdx,%r9
    1055:       5e                      pop    %rsi
    1056:       48 89 e2                mov    %rsp,%rdx
    1059:       48 83 e4 f0             and    $0xfffffffffffffff0,%rsp
    105d:       50                      push   %rax
    105e:       54                      push   %rsp
    105f:       4c 8d 05 4a 01 00 00    lea    0x14a(%rip),%r8        # 11b0 <__cxa_finalize@plt+0x170>
    1066:       48 8d 0d e3 00 00 00    lea    0xe3(%rip),%rcx        # 1150 <__cxa_finalize@plt+0x110>
    106d:       48 8d 3d c1 00 00 00    lea    0xc1(%rip),%rdi        # 1135 <__cxa_finalize@plt+0xf5>
    1074:       ff 15 66 2f 00 00       callq  *0x2f66(%rip)        # 3fe0 <__cxa_finalize@plt+0x2fa0>
    107a:       f4                      hlt    
    107b:       0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
    1080:       48 8d 3d a9 2f 00 00    lea    0x2fa9(%rip),%rdi        # 4030 <__cxa_finalize@plt+0x2ff0>
    1087:       48 8d 05 a2 2f 00 00    lea    0x2fa2(%rip),%rax        # 4030 <__cxa_finalize@plt+0x2ff0>
    108e:       48 39 f8                cmp    %rdi,%rax
    1091:       74 15                   je     10a8 <__cxa_finalize@plt+0x68>
    1093:       48 8b 05 3e 2f 00 00    mov    0x2f3e(%rip),%rax        # 3fd8 <__cxa_finalize@plt+0x2f98>
    109a:       48 85 c0                test   %rax,%rax
    109d:       74 09                   je     10a8 <__cxa_finalize@plt+0x68>
    109f:       ff e0                   jmpq   *%rax
    10a1:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
    10a8:       c3                      retq   
    10a9:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
    10b0:       48 8d 3d 79 2f 00 00    lea    0x2f79(%rip),%rdi        # 4030 <__cxa_finalize@plt+0x2ff0>
    10b7:       48 8d 35 72 2f 00 00    lea    0x2f72(%rip),%rsi        # 4030 <__cxa_finalize@plt+0x2ff0>
    10be:       48 29 fe                sub    %rdi,%rsi
    10c1:       48 c1 fe 03             sar    $0x3,%rsi
    10c5:       48 89 f0                mov    %rsi,%rax
    10c8:       48 c1 e8 3f             shr    $0x3f,%rax
    10cc:       48 01 c6                add    %rax,%rsi
    10cf:       48 d1 fe                sar    %rsi
    10d2:       74 14                   je     10e8 <__cxa_finalize@plt+0xa8>
    10d4:       48 8b 05 15 2f 00 00    mov    0x2f15(%rip),%rax        # 3ff0 <__cxa_finalize@plt+0x2fb0>
    10db:       48 85 c0                test   %rax,%rax
    10de:       74 08                   je     10e8 <__cxa_finalize@plt+0xa8>
    10e0:       ff e0                   jmpq   *%rax
    10e2:       66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
    10e8:       c3                      retq   
    10e9:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
    10f0:       80 3d 39 2f 00 00 00    cmpb   $0x0,0x2f39(%rip)        # 4030 <__cxa_finalize@plt+0x2ff0>
    10f7:       75 2f                   jne    1128 <__cxa_finalize@plt+0xe8>
    10f9:       55                      push   %rbp
    10fa:       48 83 3d f6 2e 00 00    cmpq   $0x0,0x2ef6(%rip)        # 3ff8 <__cxa_finalize@plt+0x2fb8>
    1101:       00 
    1102:       48 89 e5                mov    %rsp,%rbp
    1105:       74 0c                   je     1113 <__cxa_finalize@plt+0xd3>
    1107:       48 8b 3d 1a 2f 00 00    mov    0x2f1a(%rip),%rdi        # 4028 <__cxa_finalize@plt+0x2fe8>
    110e:       e8 2d ff ff ff          callq  1040 <__cxa_finalize@plt>
    1113:       e8 68 ff ff ff          callq  1080 <__cxa_finalize@plt+0x40>
    1118:       c6 05 11 2f 00 00 01    movb   $0x1,0x2f11(%rip)        # 4030 <__cxa_finalize@plt+0x2ff0>
    111f:       5d                      pop    %rbp
    1120:       c3                      retq   
    1121:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
    1128:       c3                      retq   
    1129:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
    1130:       e9 7b ff ff ff          jmpq   10b0 <__cxa_finalize@plt+0x70>
    1135:       55                      push   %rbp
    1136:       48 89 e5                mov    %rsp,%rbp
    1139:       48 83 ec 10             sub    $0x10,%rsp
    113d:       89 7d fc                mov    %edi,-0x4(%rbp)
    1140:       48 89 75 f0             mov    %rsi,-0x10(%rbp)
    1144:       bf 00 00 00 00          mov    $0x0,%edi
    1149:       e8 e2 fe ff ff          callq  1030 <exit@plt>
    114e:       66 90                   xchg   %ax,%ax
    1150:       41 57                   push   %r15
    1152:       4c 8d 3d 8f 2c 00 00    lea    0x2c8f(%rip),%r15        # 3de8 <__cxa_finalize@plt+0x2da8>
    1159:       41 56                   push   %r14
    115b:       49 89 d6                mov    %rdx,%r14
    115e:       41 55                   push   %r13
    1160:       49 89 f5                mov    %rsi,%r13
    1163:       41 54                   push   %r12
    1165:       41 89 fc                mov    %edi,%r12d
    1168:       55                      push   %rbp
    1169:       48 8d 2d 80 2c 00 00    lea    0x2c80(%rip),%rbp        # 3df0 <__cxa_finalize@plt+0x2db0>
    1170:       53                      push   %rbx
    1171:       4c 29 fd                sub    %r15,%rbp
    1174:       48 83 ec 08             sub    $0x8,%rsp
    1178:       e8 83 fe ff ff          callq  1000 <exit@plt-0x30>
    117d:       48 c1 fd 03             sar    $0x3,%rbp
    1181:       74 1b                   je     119e <__cxa_finalize@plt+0x15e>
    1183:       31 db                   xor    %ebx,%ebx
    1185:       0f 1f 00                nopl   (%rax)
    1188:       4c 89 f2                mov    %r14,%rdx
    118b:       4c 89 ee                mov    %r13,%rsi
    118e:       44 89 e7                mov    %r12d,%edi
    1191:       41 ff 14 df             callq  *(%r15,%rbx,8)
    1195:       48 83 c3 01             add    $0x1,%rbx
    1199:       48 39 dd                cmp    %rbx,%rbp
    119c:       75 ea                   jne    1188 <__cxa_finalize@plt+0x148>
    119e:       48 83 c4 08             add    $0x8,%rsp
    11a2:       5b                      pop    %rbx
    11a3:       5d                      pop    %rbp
    11a4:       41 5c                   pop    %r12
    11a6:       41 5d                   pop    %r13
    11a8:       41 5e                   pop    %r14
    11aa:       41 5f                   pop    %r15
    11ac:       c3                      retq   
    11ad:       0f 1f 00                nopl   (%rax)
    11b0:       c3                      retq   

As you can see, the part that was actually written by me occupies very little space. The same program (if we ignore the fact that the main function is also treated as a function in C) in Assembly:

.global _start

.text
_start: mov     $60, %rax
        xor     %rdi, %rdi
        syscall  

Assembled, linked and disassembled with gcc -c demo.s && ld demo.o -o demo && objdump -d demo:

Disassembly of section .text:

0000000000401000 <_start>:
  401000:       48 c7 c0 3c 00 00 00    mov    $0x3c,%rax
  401007:       48 31 ff                xor    %rdi,%rdi
  40100a:       0f 05                   syscall 

The question is: what purpose do all these instructions serve and is there a way to generate code without them?

While I was writing the question I noticed that the C program calls exit() from the linked library whereas in Assembly I do it directly with a syscall. I don't think it is important in this case though.

  • 1
    It’s not optimized so there’s going to be a lot of stuff. If you take the optimized version you’ll see something very different. – Sami Kuhmonen Dec 20 '19 at 17:47
  • 4
    Those are parts of the C library. You can ask for `-nostdlib` but of course then you don't get to use `exit`. Obviously you should also enable optimization `-O2`. – Jester Dec 20 '19 at 17:49
  • @Jester compiled with `gcc test.c -nostdlib -e main -O2 -o test ` but `size test` tells me the size of the .text segment is 184 bytes but disassembly with objdump shows just 2 instructions: `xor %eax, %eax` and `retq`. What bloats it now? –  Dec 20 '19 at 17:57
  • 1
    Try `size --format=SysV` instead. Also add `-no-pie` to `gcc` in case that's not the default on your system. Other options worth adding are `-fno-unwind-tables -fno-asynchronous-unwind-tables -fno-exceptions` – Jester Dec 20 '19 at 18:04
  • @Jester `--format=SysV` did the trick and `-no-pie` reduced the size of the executable even further. Now I will have to learn what PIE is. Thank you. –  Dec 20 '19 at 18:08
  • 2
    Note that as you passed `-s` to the compiler, you won't be able to distinguish the code generated for `main` from the runtime support code linked into the program. Note further that your assembly program does not actually do the same thing as it does not run any `atexit` triggers or flushes stdio streams on exit nor initialises or deinitialises any shared libraries that could have been added during linking. Taking care of this is one reason the C program compiles to more code. – fuz Dec 20 '19 at 19:39
  • 1
    PIE = position independent executable. Read about [Position Independent Code](https://en.wikipedia.org/wiki/Position-independent_code) – Basile Starynkevitch Dec 21 '19 at 12:14

3 Answers3

2

gcc generates unnecessary (?) instructions

Yes, because you invoked GCC without asking for any compiler optimizations.

My recommendation: compile with

gcc -fverbose-asm -O2 -S test.c

then look inside the generated test.s assembler code.

BTW, most of the code is from crt0, which is given by, not emitted by, gcc. Build your executable with gcc -O2 -v test.c -o testprog to understand what GCC really does. Read documentation of GCC internals.

Since GCC is free software, you are allowed to look inside its source code and improve it. But the crt0 stuff is tricky, and operating system specific.

Consider also reading about linkers and loaders, about ELF executables, and How to write shared libraries, and the Linux Assembler HowTo.

Basile Starynkevitch
  • 223,805
  • 18
  • 296
  • 547
1

gcc -s strips symbol names out of the final executable so you can't tell where different parts of the machine code came from.

Most of it is not from your main. To just see that, look at gcc -S output (asm source), e.g. on https://godbolt.org/. How to remove "noise" from GCC/clang assembly output?


Most of that is the CRT (C RunTime) startup code that eventually calls your main after initializing the standard library. (e.g. allocating memory for stdio buffers and so on.) It gets linked in regardless of how efficient your main is. e.g. compiling an empty int main(void){} with gcc -Os (optimize for size) will barely make it any smaller.

You could in theory compile with gcc -nostdlib and write your own _start that uses inline asm to make an exit system call.

See also

Peter Cordes
  • 328,167
  • 45
  • 605
  • 847
0

C program does a lots of stuff before calling the main function. It has to initialize .data and .bss segments, set the stack, go through the constructors and destructors (yes gcc in C has a special attributes for such a functions) and initializes the library.

gcc destructor and constructor functions:

void __attribute__ ((constructor)) funcname(void);

void __attribute__ ((destructor)) funcname(void);

you may have as many constructors and destructors as you wish.

constructors are called before call to the main function, destructors on exit from the program (after the main termination)

https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Function-Attributes.html#Function-Attributes

0___________
  • 60,014
  • 4
  • 34
  • 74
  • Could you elaborate? What exactly do those instructions do? Especially the calls to __cxa_finalize –  Dec 20 '19 at 18:00
  • this one calls destructors – 0___________ Dec 20 '19 at 18:01
  • but what is there to be destructed? –  Dec 20 '19 at 18:03
  • @Jester I am learning and this is why I asked this question. I have programming experience in C but I am not exactly familiar with compiler peculiarities. –  Dec 20 '19 at 18:11
  • 1
    @Wyris just do not focus on it for now. It is not needed for "normal" programming. The code surrounding the main function is also strongly implementation and target dependant. Focus on the code you write – 0___________ Dec 20 '19 at 18:21
  • 2
    Note that on hosted systems, the data and bss segments are generally initialised by the operating system. @Wyris In your program, there is nothing to be destructed, but the compiler has no way to know as you could have linked in a shared library doing such a thing. – fuz Dec 20 '19 at 19:42
  • @fuz this is was only an example. Usually much more is happening there as well – 0___________ Dec 20 '19 at 22:24
  • 1
    @P__J__ Sure, but the one example you gave is something that generally does not happen. – fuz Dec 21 '19 at 15:06