2

I wanted to know how methods are implemented in C++. I wanted to know how methods are implemented "under the hood". So, I have made a simple C++ program which has a class with 1 non static field and 1 non static, non virtual method.

Then I instantiated the class in the main function and called the method. I have used objdump -d option in order to see the CPU instructions of this program. I have a x86-64 processor. Here's the code:

#include<stdio.h>


class TestClass {
public:
   int x;
   int xPlus2(){
       return x + 2;
   }
};

int main(){
    TestClass tc1 = {5};
    int variable = tc1.xPlus2();
    printf("%d \n", variable);
    return 0;
}

Here are instructions for the method xPlus2:

  0000000000402c30 <_ZN9TestClass6xPlus2Ev>:
  402c30:   55                      push   %rbp
  402c31:   48 89 e5                mov    %rsp,%rbp
  402c34:   48 89 4d 10             mov    %rcx,0x10(%rbp)
  402c38:   48 8b 45 10             mov    0x10(%rbp),%rax
  402c3c:   8b 00                   mov    (%rax),%eax
  402c3e:   83 c0 02                add    $0x2,%eax
  402c41:   5d                      pop    %rbp
  402c42:   c3                      retq   
  402c43:   90                      nop
  402c44:   90                      nop
  402c45:   90                      nop
  402c46:   90                      nop
  402c47:   90                      nop
  402c48:   90                      nop
  402c49:   90                      nop
  402c4a:   90                      nop
  402c4b:   90                      nop
  402c4c:   90                      nop
  402c4d:   90                      nop
  402c4e:   90                      nop
  402c4f:   90                      nop

If I understand it correctly, these instructions can be replaced by just 3 instructions, because I believe that I don't need to use the stack, I think the compiler used it redundantly:

mov (%rcx), eax
add $2, eax
retq

and then maybe I still need lots of nop instructions for synchronization purposes or whatnot. If you look at the CPU instructions, it looks like the value that x field has is stored at the location in memory which rcx register holds. You will see the rest of the CPU instructions in a moment. It is a little bit hard for me to track what has happened here (especially what is going on with the call of _main function), I don't even know what parts of assembly are important to look at. Compiler produces main function (as I expected), but then it also produced _main function which is called from the main, there are some weird functions in between those two as well. Here are other parts of the assembly that I think may be interesting:

  0000000000401550 <main>:
  401550:   55                      push   %rbp
  401551:   48 89 e5                mov    %rsp,%rbp
  401554:   48 83 ec 30             sub    $0x30,%rsp
  401558:   e8 e3 00 00 00          callq  401640 <__main>
  40155d:   c7 45 f8 05 00 00 00    movl   $0x5,-0x8(%rbp)
  401564:   48 8d 45 f8             lea    -0x8(%rbp),%rax
  401568:   48 89 c1                mov    %rax,%rcx
  40156b:   e8 c0 16 00 00          callq  402c30 <_ZN9TestClass6xPlus2Ev>
  401570:   89 45 fc                mov    %eax,-0x4(%rbp)
  401573:   8b 45 fc                mov    -0x4(%rbp),%eax
  401576:   89 c2                   mov    %eax,%edx
  401578:   48 8d 0d 81 2a 00 00    lea    0x2a81(%rip),%rcx        # 404000 <.rdata>
  40157f:   e8 ec 14 00 00          callq  402a70 <printf>
  401584:   b8 00 00 00 00          mov    $0x0,%eax
  401589:   48 83 c4 30             add    $0x30,%rsp
  40158d:   5d                      pop    %rbp
  40158e:   c3                      retq   
  40158f:   90                      nop

0000000000401590 <__do_global_dtors>:
  401590:   48 83 ec 28             sub    $0x28,%rsp
  401594:   48 8b 05 75 1a 00 00    mov    0x1a75(%rip),%rax        # 403010 <p.93846>
  40159b:   48 8b 00                mov    (%rax),%rax
  40159e:   48 85 c0                test   %rax,%rax
  4015a1:   74 1d                   je     4015c0 <__do_global_dtors+0x30>
  4015a3:   ff d0                   callq  *%rax
  4015a5:   48 8b 05 64 1a 00 00    mov    0x1a64(%rip),%rax        # 403010 <p.93846>
  4015ac:   48 8d 50 08             lea    0x8(%rax),%rdx
  4015b0:   48 8b 40 08             mov    0x8(%rax),%rax
  4015b4:   48 89 15 55 1a 00 00    mov    %rdx,0x1a55(%rip)        # 403010 <p.93846>
  4015bb:   48 85 c0                test   %rax,%rax
  4015be:   75 e3                   jne    4015a3 <__do_global_dtors+0x13>
  4015c0:   48 83 c4 28             add    $0x28,%rsp
  4015c4:   c3                      retq   
  4015c5:   90                      nop
  4015c6:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  4015cd:   00 00 00 

00000000004015d0 <__do_global_ctors>:
  4015d0:   56                      push   %rsi
  4015d1:   53                      push   %rbx
  4015d2:   48 83 ec 28             sub    $0x28,%rsp
  4015d6:   48 8b 0d 23 2d 00 00    mov    0x2d23(%rip),%rcx        # 404300 <.refptr.__CTOR_LIST__>
  4015dd:   48 8b 11                mov    (%rcx),%rdx
  4015e0:   83 fa ff                cmp    $0xffffffff,%edx
  4015e3:   89 d0                   mov    %edx,%eax
  4015e5:   74 39                   je     401620 <__do_global_ctors+0x50>
  4015e7:   85 c0                   test   %eax,%eax
  4015e9:   74 20                   je     40160b <__do_global_ctors+0x3b>
  4015eb:   89 c2                   mov    %eax,%edx
  4015ed:   83 e8 01                sub    $0x1,%eax
  4015f0:   48 8d 1c d1             lea    (%rcx,%rdx,8),%rbx
  4015f4:   48 29 c2                sub    %rax,%rdx
  4015f7:   48 8d 74 d1 f8          lea    -0x8(%rcx,%rdx,8),%rsi
  4015fc:   0f 1f 40 00             nopl   0x0(%rax)
  401600:   ff 13                   callq  *(%rbx)
  401602:   48 83 eb 08             sub    $0x8,%rbx
  401606:   48 39 f3                cmp    %rsi,%rbx
  401609:   75 f5                   jne    401600 <__do_global_ctors+0x30>
  40160b:   48 8d 0d 7e ff ff ff    lea    -0x82(%rip),%rcx        # 401590 <__do_global_dtors>
  401612:   48 83 c4 28             add    $0x28,%rsp
  401616:   5b                      pop    %rbx
  401617:   5e                      pop    %rsi
  401618:   e9 f3 fe ff ff          jmpq   401510 <atexit>
  40161d:   0f 1f 00                nopl   (%rax)
  401620:   31 c0                   xor    %eax,%eax
  401622:   eb 02                   jmp    401626 <__do_global_ctors+0x56>
  401624:   89 d0                   mov    %edx,%eax
  401626:   44 8d 40 01             lea    0x1(%rax),%r8d
  40162a:   4a 83 3c c1 00          cmpq   $0x0,(%rcx,%r8,8)
  40162f:   4c 89 c2                mov    %r8,%rdx
  401632:   75 f0                   jne    401624 <__do_global_ctors+0x54>
  401634:   eb b1                   jmp    4015e7 <__do_global_ctors+0x17>
  401636:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
  40163d:   00 00 00 

0000000000401640 <__main>:
  401640:   8b 05 ea 59 00 00       mov    0x59ea(%rip),%eax        # 407030 <initialized>
  401646:   85 c0                   test   %eax,%eax
  401648:   74 06                   je     401650 <__main+0x10>
  40164a:   c3                      retq   
  40164b:   0f 1f 44 00 00          nopl   0x0(%rax,%rax,1)
  401650:   c7 05 d6 59 00 00 01    movl   $0x1,0x59d6(%rip)        # 407030 <initialized>
  401657:   00 00 00 
  40165a:   e9 71 ff ff ff          jmpq   4015d0 <__do_global_ctors>
  40165f:   90                      nop
qingy2019
  • 536
  • 7
  • 23
catloverxx
  • 47
  • 8
  • Related/partial duplicate: [How do objects work in x86 at the assembly level?](https://stackoverflow.com/q/33556511) includes an example of calling a non-virtual member function, with `this` as an implicit first arg in terms of the normal C calling convention. – Peter Cordes Mar 02 '22 at 10:03
  • Thank you. When it comes to calling conventions, on my Windows, ecx and edx seem to be where the arguments of a function are placed while on my Ubuntu edi and esi are registers where the arguments of a function are placed. Is that normal? – catloverxx Mar 02 '22 at 17:00
  • Yes. [Why does Windows64 use a different calling convention from all other OSes on x86-64?](https://stackoverflow.com/q/4429398) – Peter Cordes Mar 03 '22 at 01:56

2 Answers2

3

I think what you are looking for are these instructions:

 40155d:   c7 45 f8 05 00 00 00    movl   $0x5,-0x8(%rbp)
 401564:   48 8d 45 f8             lea    -0x8(%rbp),%rax
 401568:   48 89 c1                mov    %rax,%rcx
 40156b:   e8 c0 16 00 00          callq  402c30 <_ZN9TestClass6xPlus2Ev>
 401570:   89 45 fc                mov    %eax,-0x4(%rbp)

These match with the code from main:

TestClass tc1 = {5};
int variable = tc1.xPlus2();
  • At address 40155d the field tc1.x is initialized with the value 5.
  • At address 401564 the pointer to tc1 is loaded into the register %rax
  • At address 401568 the pointer to tc1 is copied into the register %rcx
  • At address 40156b is the call of the method tc1.xPlus2()
  • At address 401570 the result is store in variable
Thomas Kläger
  • 17,754
  • 3
  • 23
  • 34
  • Thank you! This comment answered what I was looking for. I would upvote your answer but I don't have the reputation to do that I am a new member of stackoverflow. – catloverxx Mar 02 '22 at 06:33
  • @catloverxx if this is the answer that you were looking for then you should mark it as the accepted answer. – Thomas Kläger Mar 02 '22 at 07:03
3

Your observations are mostly correct. rcx holds the this pointer to the object on which the method was called. x is stored in the first area of memory that the this pointer points to, so that is why rcx was dereferenced and the result added to. It is the responsibility of the caller to make sure that rcx is the address of the object before invoking the function. We can see main prepare rcx by setting it to an address in its stack frame. You are correct that the compiler produced inefficient code here and did not need to use the stack. Compiling with higher optimization levels -O1, -O2, or -O3 will likely fix that. These higher optimizations will probably get rid of the nops too, since they are used for function alignment. You can mostly ignore __main. It's used for libc initialization.

Nathan Farlow
  • 346
  • 1
  • 5
  • Thank you for the answer! It cleared things up. Does this mean that if an API or library asks me to provide a function pointer as an argument, I have no way of "cheating" by providing a method or something similar to a functional interface? The library function will simply not put the "this" pointer in the rcx register and I don't see how I can do anything about it. – catloverxx Mar 02 '22 at 12:10
  • You can technically obtain a pointer to the method, but it will not be invoked correctly exactly as you said if passed this way (probably segfault). Your options are to pass a valid function pointer (a normal function or a static function of a class), or pass a lambda since lambdas decay to function pointers. Although note that you will not be able to capture anything with this lambda, since lambdas that capture cannot decay to function pointers. Sometimes libraries accept a more general "callable" argument where you have much more flexibility with lambdas and other types. Hope that helps! – Nathan Farlow Mar 02 '22 at 23:07
  • @catloverxx: Right, pointer-to-member-function is a thing in C++, but has a different type from a normal function pointer. The 2nd entry in the FAQ https://isocpp.org/wiki/faq/pointers-to-members is about trying to pass such a pointer as a callback arg that expects a normal function pointer. – Peter Cordes Mar 03 '22 at 02:06
  • @catloverxx: The only plausible hack is if the callback mechanism lets you give it args to pass, and the first one can be a `void*` or `T*`, then you could break the C++ rules if the calling convention for normal functions matches that for member functions. (e.g. x86-64, but not 32-bit Windows using `thiscall` for member functions (`this` in ECX, rest on the stack) but some other convention like `stdcall` for non-member functions.) i.e. if `Foo::bar(int, char)` is the same in asm as `bar(Foo*, int, char)`, then you can potentially get away with it as a non-portable hack. – Peter Cordes Mar 03 '22 at 02:07
  • @catloverxx: But if you can get a callback to pass a `T*` at all, then you can just write a wrapper that calls the member function, and let the member function inline into it. That is fully portable, not tricking the compiler with calling convention hacks. – Peter Cordes Mar 03 '22 at 02:08