0

Hello as I run c++ code in clion IDE debugger, after main() returns, the debugger steps into a file called disassembly, and it contains what looks like assmebly code. What are those instructions? What does it do? Should I care? as I'm new to c++ I'm familiarizing myself with the language, IDE and anything else of relevance.

start:
    nop
    movl   %eax, %edi
    callq  0x2e82e                    ; symbol stub for: exit
    hlt
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop


exit:
    jmpq   *0x268c241c(%rip)


exit:
    pushq  %rbp
    movq   %rsp, %rbp
    pushq  %rbx
    pushq  %rax
    movl   %edi, %ebx
    cmpl   $0xad, %edi
    jne    0x5a404                    ; <+41>
    leaq   0x2683a31e(%rip), %rcx
    movq   (%rcx), %rax
    testq  %rax, %rax
    je     0x5a404                    ; <+41>
    xorl   %eax, %eax
    xchgq  %rax, (%rcx)
    testq  %rax, %rax
    jne    0x5a427                    ; <+76>
    xorl   %eax, %eax
    callq  0x8017c                    ; symbol stub for: _tlv_exit
    xorl   %edi, %edi
    callq  0x5a196                    ; __cxa_finalize
    movq   0x268354f7(%rip), %rax
    testq  %rax, %rax
    je     0x5a420                    ; <+69>
    callq  *%rax
    movl   %ebx, %edi
    callq  0x8000e                    ; symbol stub for: __exit
    callq  *%rax
    ud2

There is also this

_tlv_exit:
    pushq  %rbp
    movq   %rsp, %rbp
    pushq  %rbx
    pushq  %rax
    movq   0x268db5e9(%rip), %rdi
    callq  0x2e92a                    ; symbol stub for: pthread_getspecific
    testq  %rax, %rax
    je     0x18e20                    ; <+54>
    movq   %rax, %rbx
    movq   0x268db5d5(%rip), %rdi
    xorl   %esi, %esi
    callq  0x2e942                    ; symbol stub for: pthread_setspecific
    movq   %rbx, %rdi
    addq   $0x8, %rsp
    popq   %rbx
    popq   %rbp
    jmp    0x1983e                    ; tlv_finalize_list
    addq   $0x8, %rsp
    popq   %rbx
    popq   %rbp
    retq
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop


start:
    nop
    movl   %eax, %edi
    callq  0x2e82e                    ; symbol stub for: exit
    hlt
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop
    nop


exit:
    jmpq   *0x268c241c(%rip)


pthread_getspecific:
    jmpq   *0x268c2470(%rip)


__cxa_finalize_ranges:
    pushq  %rbp
    movq   %rsp, %rbp
    pushq  %r15
    pushq  %r14
    pushq  %r13
    pushq  %r12
    pushq  %rbx
    subq   $0x18, %rsp
    movl   %esi, -0x2c(%rbp)
    movq   %rdi, -0x38(%rbp)
    leaq   0x26834d24(%rip), %rdi
    callq  0x804d6                    ; symbol stub for: pthread_mutex_lock
    movq   0x26834ca0(%rip), %r13
    testq  %r13, %r13
    je     0x5a17c                    ; <+383>
    movl   -0x2c(%rbp), %ebx
    addq   $0x8, -0x38(%rbp)
    movslq 0x8(%r13), %r15
    testq  %r15, %r15
    jle    0x5a16f                    ; <+370>
    decq   %r15
    movq   %r15, %r14
    shlq   $0x5, %r14
    movl   0x10(%r13,%r14), %r12d
    testl  %r12d, %r12d
    je     0x5a03d                    ; <+64>
    cmpl   $0x0, -0x2c(%rbp)
    je     0x5a102                    ; <+261>
    cmpl   $0x1, %r12d
    je     0x5a0a4                    ; <+167>
    cmpl   $0x3, %r12d
    je     0x5a0d1                    ; <+212>
    cmpl   $0x2, %r12d
    jne    0x5a102                    ; <+261>
    movq   0x28(%r13,%r14), %rax
    movq   -0x38(%rbp), %rcx
    xorl   %edx, %edx
    movq   -0x8(%rcx), %rsi
    cmpq   %rax, %rsi
    ja     0x5a096                    ; <+153>
    addq   (%rcx), %rsi
    cmpq   %rax, %rsi
    ja     0x5a102                    ; <+261>
    incq   %rdx
    addq   $0x10, %rcx
    cmpq   %rbx, %rdx
    jb     0x5a085                    ; <+136>
    jmp    0x5a03d                    ; <+64>
    movq   0x18(%r13,%r14), %rax
    movq   -0x38(%rbp), %rcx
    xorl   %edx, %edx
    movq   -0x8(%rcx), %rsi
    cmpq   %rax, %rsi
    ja     0x5a0c0                    ; <+195>
    addq   (%rcx), %rsi
    cmpq   %rax, %rsi
    ja     0x5a102                    ; <+261>
    incq   %rdx
    addq   $0x10, %rcx
    cmpq   %rbx, %rdx
    jb     0x5a0af                    ; <+178>
    jmp    0x5a03d                    ; <+64>
    movq   0x18(%r13,%r14), %rax
    movq   0x10(%rax), %rax
    movq   -0x38(%rbp), %rcx
    xorl   %edx, %edx
    movq   -0x8(%rcx), %rsi
    cmpq   %rax, %rsi
    ja     0x5a0f1                    ; <+244>
    addq   (%rcx), %rsi
    cmpq   %rax, %rsi
    ja     0x5a102                    ; <+261>
    incq   %rdx
    addq   $0x10, %rcx
    cmpq   %rbx, %rdx
    jb     0x5a0e0                    ; <+227>
    jmp    0x5a03d                    ; <+64>
    leaq   0x10(%r13,%r14), %rax
    movl   $0x0, (%rax)
    movb   $0x0, 0x26834b94(%rip)
    leaq   0x26834c25(%rip), %rdi
    callq  0x804e2                    ; symbol stub for: pthread_mutex_unlock
    cmpl   $0x1, %r12d
    je     0x5a13e                    ; <+321>
    cmpl   $0x3, %r12d
    je     0x5a145                    ; <+328>
    cmpl   $0x2, %r12d
    jne    0x5a14d                    ; <+336>
    movq   0x20(%r13,%r14), %rdi
    callq  *0x18(%r13,%r14)
    jmp    0x5a14d                    ; <+336>
    callq  *0x18(%r13,%r14)
    jmp    0x5a14d                    ; <+336>
    movq   0x18(%r13,%r14), %rdi
    callq  *0x10(%rdi)
    leaq   0x26834bec(%rip), %rdi
    callq  0x804d6                    ; symbol stub for: pthread_mutex_lock
    cmpb   $0x0, 0x26834b48(%rip)
    je     0x5a03d                    ; <+64>
    movq   0x26834b5b(%rip), %r13
    jmp    0x5a173                    ; <+374>
    movq   (%r13), %r13
    testq  %r13, %r13
    jne    0x5a039                    ; <+60>
    leaq   0x26834bbd(%rip), %rdi
    addq   $0x18, %rsp
    popq   %rbx
    popq   %r12
    popq   %r13
    popq   %r14
    popq   %r15
    popq   %rbp
    jmp    0x804e2                    ; symbol stub for: pthread_mutex_unlock


__cxa_finalize:
    testq  %rdi, %rdi
    je     0x5a1c5                    ; <+47>
    pushq  %rbp
    movq   %rsp, %rbp
    subq   $0x10, %rsp
    leaq   -0x10(%rbp), %rax
    movq   %rdi, (%rax)
    movq   $0x1, 0x8(%rax)
    movq   %rax, %rdi
    movl   $0x1, %esi
    callq  0x59ffd                    ; __cxa_finalize_ranges
    addq   $0x10, %rsp
    popq   %rbp
    retq
    xorl   %edi, %edi
    xorl   %esi, %esi
    jmp    0x59ffd                    ; __cxa_finalize_ranges


exit:
    pushq  %rbp
    movq   %rsp, %rbp
    pushq  %rbx
    pushq  %rax
    movl   %edi, %ebx
    cmpl   $0xad, %edi
    jne    0x5a404                    ; <+41>
    leaq   0x2683a31e(%rip), %rcx
    movq   (%rcx), %rax
    testq  %rax, %rax
    je     0x5a404                    ; <+41>
    xorl   %eax, %eax
    xchgq  %rax, (%rcx)
    testq  %rax, %rax
    jne    0x5a427                    ; <+76>
    xorl   %eax, %eax
    callq  0x8017c                    ; symbol stub for: _tlv_exit
    xorl   %edi, %edi
    callq  0x5a196                    ; __cxa_finalize
    movq   0x268354f7(%rip), %rax
    testq  %rax, %rax
    je     0x5a420                    ; <+69>
    callq  *%rax
    movl   %ebx, %edi
    callq  0x8000e                    ; symbol stub for: __exit
    callq  *%rax
    ud2


_tlv_exit:
    jmpq   *0x2680cbd6(%rip)


pthread_getspecific:
    movq   %gs:(,%rdi,8), %rax
    retq
  • Is this "How can I read assembly output?" If so you need to look at the documentation for the ISA you're compiling for (e.g. x86_64 or x86). – tadman Aug 11 '20 at 04:58
  • I don't need specifics, I just need to understand the general overview and significance of this step. –  Aug 11 '20 at 05:02
  • This is just so you can see how it was compiled which can shed light on optimization problems. It's also educational in that you can see how your C++ code translates, roughly speaking, to machine code. – tadman Aug 11 '20 at 05:02
  • So, are you suggesting that I should learn how to read assembly code for improving c++ programs that I may write in the future? –  Aug 11 '20 at 05:05
  • 1
    "disassembly" isn't a filename, it's what the debugger does if it doesn't have high-level source. (instead it shows you a text representation of the machine code; there's always machine code because that's what the CPU runs. You can ask a debugger to show you disassembly for any function, instead of or as well as the high-level source.) – Peter Cordes Aug 11 '20 at 05:07
  • 1
    It's not necessary out of the gate, but if you're ever trying to get to the bottom of a tricky performance problem you'll often want to look at the assembly output to see if the compiler is doing what you think it's doing. That's a more advanced sort of thing, and unless you're writing performance-critical code where you're shaving nanoseconds off of something it's unlikely to be important. – tadman Aug 11 '20 at 05:08
  • Are these files equivalent to `.pyc` files in python? –  Aug 11 '20 at 05:09
  • @PeterCordes does the debugger run the code twice: one steps through c++ and the second time steps through the disassembly phase or is this just being the program being ended? –  Aug 11 '20 at 05:12
  • [Here's a question](https://stackoverflow.com/questions/63351841/why-does-gcc-delete-my-code-on-o3-but-not-on-o0) involving the use of such output. – tadman Aug 11 '20 at 05:12
  • re: do *you* need to know asm: no, but understanding the kinds of things that asm instructions can do can be helpful in thinking about how computers work, like what kinds of things a CPU can do efficiently or not. [Why do we even need assembler when we have compiler?](https://stackoverflow.com/q/51780158). re: understanding `main`'s caller: unnecessary, usually even undefined behaviour bugs don't lead to weird stuff happening in this code; you either return to it ok or you crash. But see [Return vs Exit from main function in C](//stackoverflow.com/posts/comments/111950517) for asm info – Peter Cordes Aug 11 '20 at 05:14
  • 1
    @sK500: The debugger itself doesn't *run* your code per-se, it just controls execution of it in a separate process with help from the kernel, and CPU hardware single-step / breakpoint. You can single step by asm instruction (GDB `stepi` / `si`) or by source line (GDB `step` / `s`). It can't run at all until after a compiler turns your high-level source into machine code, but debug info tells the debugger where in the machine code represents the start of a block for a new source line. (It's only that simple in debug builds, like `gcc -O0`, otherwise code from separate statements can be mixed.) – Peter Cordes Aug 11 '20 at 05:16
  • 1
    Related: [Why does clang produce inefficient asm with -O0 (for this simple floating point sum)?](https://stackoverflow.com/q/53366394). (Also re: asm in general: [How to remove "noise" from GCC/clang assembly output?](https://stackoverflow.com/q/38552116) - especially the link to Matt Godbolt's talk.) – Peter Cordes Aug 11 '20 at 05:17

2 Answers2

2

Assembly output is just a dump of the executable code the compiler generated, but in a human-readable form1. This is not actually used by the compiler, it's just an artifact of the compilation process to be used for reference.

Remember, the compiled executable can be converted into assembly code at any time, tools like IDA Pro and Ghidra excel at doing this on any executable, but the compiler can add in contextual information that's lost in the final compilation phase in the form of comments or useful labels for things.

The compiler often emits debug hints for your compiled executable so it can turn a stack-trace into something that maps back to your original source code. These artifacts are much more useful as they allow you to step through C++ code instead of assembly code. If you ever have to debug in a library you don't have the source for you'll be stuck stepping through an assembly view of the executable code.


1 Presuming you can read assembly code.

tadman
  • 208,517
  • 23
  • 234
  • 262
  • 1
    thanks, that's very helpful I will check what those instructions mean –  Aug 11 '20 at 05:26
2

The code you posted is support code from your libc runtime. The runtime is responsible for, among others:

  • implementing atexit hooks;
  • setting up your IO streams (cin, cout);
  • running constructors of any global static variables.

This answer has a more complete overview. You can search for articles about libc_start_main and related functions to learn more.

Botje
  • 26,269
  • 3
  • 31
  • 41