I decided to compile a very basic C program and take a look at the generated code with objdump -d
.
int main(int argc, char *argv[]) {
exit(0);
}
After compiling it with gcc test.c -s -o test.o
and then disassembling with objdump -d
my text segment looked like this:
Disassembly of section .text:
0000000000001050 <.text>:
1050: 31 ed xor %ebp,%ebp
1052: 49 89 d1 mov %rdx,%r9
1055: 5e pop %rsi
1056: 48 89 e2 mov %rsp,%rdx
1059: 48 83 e4 f0 and $0xfffffffffffffff0,%rsp
105d: 50 push %rax
105e: 54 push %rsp
105f: 4c 8d 05 4a 01 00 00 lea 0x14a(%rip),%r8 # 11b0 <__cxa_finalize@plt+0x170>
1066: 48 8d 0d e3 00 00 00 lea 0xe3(%rip),%rcx # 1150 <__cxa_finalize@plt+0x110>
106d: 48 8d 3d c1 00 00 00 lea 0xc1(%rip),%rdi # 1135 <__cxa_finalize@plt+0xf5>
1074: ff 15 66 2f 00 00 callq *0x2f66(%rip) # 3fe0 <__cxa_finalize@plt+0x2fa0>
107a: f4 hlt
107b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
1080: 48 8d 3d a9 2f 00 00 lea 0x2fa9(%rip),%rdi # 4030 <__cxa_finalize@plt+0x2ff0>
1087: 48 8d 05 a2 2f 00 00 lea 0x2fa2(%rip),%rax # 4030 <__cxa_finalize@plt+0x2ff0>
108e: 48 39 f8 cmp %rdi,%rax
1091: 74 15 je 10a8 <__cxa_finalize@plt+0x68>
1093: 48 8b 05 3e 2f 00 00 mov 0x2f3e(%rip),%rax # 3fd8 <__cxa_finalize@plt+0x2f98>
109a: 48 85 c0 test %rax,%rax
109d: 74 09 je 10a8 <__cxa_finalize@plt+0x68>
109f: ff e0 jmpq *%rax
10a1: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
10a8: c3 retq
10a9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
10b0: 48 8d 3d 79 2f 00 00 lea 0x2f79(%rip),%rdi # 4030 <__cxa_finalize@plt+0x2ff0>
10b7: 48 8d 35 72 2f 00 00 lea 0x2f72(%rip),%rsi # 4030 <__cxa_finalize@plt+0x2ff0>
10be: 48 29 fe sub %rdi,%rsi
10c1: 48 c1 fe 03 sar $0x3,%rsi
10c5: 48 89 f0 mov %rsi,%rax
10c8: 48 c1 e8 3f shr $0x3f,%rax
10cc: 48 01 c6 add %rax,%rsi
10cf: 48 d1 fe sar %rsi
10d2: 74 14 je 10e8 <__cxa_finalize@plt+0xa8>
10d4: 48 8b 05 15 2f 00 00 mov 0x2f15(%rip),%rax # 3ff0 <__cxa_finalize@plt+0x2fb0>
10db: 48 85 c0 test %rax,%rax
10de: 74 08 je 10e8 <__cxa_finalize@plt+0xa8>
10e0: ff e0 jmpq *%rax
10e2: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
10e8: c3 retq
10e9: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
10f0: 80 3d 39 2f 00 00 00 cmpb $0x0,0x2f39(%rip) # 4030 <__cxa_finalize@plt+0x2ff0>
10f7: 75 2f jne 1128 <__cxa_finalize@plt+0xe8>
10f9: 55 push %rbp
10fa: 48 83 3d f6 2e 00 00 cmpq $0x0,0x2ef6(%rip) # 3ff8 <__cxa_finalize@plt+0x2fb8>
1101: 00
1102: 48 89 e5 mov %rsp,%rbp
1105: 74 0c je 1113 <__cxa_finalize@plt+0xd3>
1107: 48 8b 3d 1a 2f 00 00 mov 0x2f1a(%rip),%rdi # 4028 <__cxa_finalize@plt+0x2fe8>
110e: e8 2d ff ff ff callq 1040 <__cxa_finalize@plt>
1113: e8 68 ff ff ff callq 1080 <__cxa_finalize@plt+0x40>
1118: c6 05 11 2f 00 00 01 movb $0x1,0x2f11(%rip) # 4030 <__cxa_finalize@plt+0x2ff0>
111f: 5d pop %rbp
1120: c3 retq
1121: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
1128: c3 retq
1129: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
1130: e9 7b ff ff ff jmpq 10b0 <__cxa_finalize@plt+0x70>
1135: 55 push %rbp
1136: 48 89 e5 mov %rsp,%rbp
1139: 48 83 ec 10 sub $0x10,%rsp
113d: 89 7d fc mov %edi,-0x4(%rbp)
1140: 48 89 75 f0 mov %rsi,-0x10(%rbp)
1144: bf 00 00 00 00 mov $0x0,%edi
1149: e8 e2 fe ff ff callq 1030 <exit@plt>
114e: 66 90 xchg %ax,%ax
1150: 41 57 push %r15
1152: 4c 8d 3d 8f 2c 00 00 lea 0x2c8f(%rip),%r15 # 3de8 <__cxa_finalize@plt+0x2da8>
1159: 41 56 push %r14
115b: 49 89 d6 mov %rdx,%r14
115e: 41 55 push %r13
1160: 49 89 f5 mov %rsi,%r13
1163: 41 54 push %r12
1165: 41 89 fc mov %edi,%r12d
1168: 55 push %rbp
1169: 48 8d 2d 80 2c 00 00 lea 0x2c80(%rip),%rbp # 3df0 <__cxa_finalize@plt+0x2db0>
1170: 53 push %rbx
1171: 4c 29 fd sub %r15,%rbp
1174: 48 83 ec 08 sub $0x8,%rsp
1178: e8 83 fe ff ff callq 1000 <exit@plt-0x30>
117d: 48 c1 fd 03 sar $0x3,%rbp
1181: 74 1b je 119e <__cxa_finalize@plt+0x15e>
1183: 31 db xor %ebx,%ebx
1185: 0f 1f 00 nopl (%rax)
1188: 4c 89 f2 mov %r14,%rdx
118b: 4c 89 ee mov %r13,%rsi
118e: 44 89 e7 mov %r12d,%edi
1191: 41 ff 14 df callq *(%r15,%rbx,8)
1195: 48 83 c3 01 add $0x1,%rbx
1199: 48 39 dd cmp %rbx,%rbp
119c: 75 ea jne 1188 <__cxa_finalize@plt+0x148>
119e: 48 83 c4 08 add $0x8,%rsp
11a2: 5b pop %rbx
11a3: 5d pop %rbp
11a4: 41 5c pop %r12
11a6: 41 5d pop %r13
11a8: 41 5e pop %r14
11aa: 41 5f pop %r15
11ac: c3 retq
11ad: 0f 1f 00 nopl (%rax)
11b0: c3 retq
As you can see, the part that was actually written by me occupies very little space. The same program (if we ignore the fact that the main function is also treated as a function in C) in Assembly:
.global _start
.text
_start: mov $60, %rax
xor %rdi, %rdi
syscall
Assembled, linked and disassembled with gcc -c demo.s && ld demo.o -o demo && objdump -d demo
:
Disassembly of section .text:
0000000000401000 <_start>:
401000: 48 c7 c0 3c 00 00 00 mov $0x3c,%rax
401007: 48 31 ff xor %rdi,%rdi
40100a: 0f 05 syscall
The question is: what purpose do all these instructions serve and is there a way to generate code without them?
While I was writing the question I noticed that the C program calls exit() from the linked library whereas in Assembly I do it directly with a syscall. I don't think it is important in this case though.