EDIT: Changed MWE to avoid to much optimization which reduced the handwritten version to 1
.
So after reading this discussion here: Implementing comparison operators via 'tuple' and 'tie', a good idea?, I wanted to check one of the statements that was made in the comments (JohannesD):
Was positively surprised to see that the compiler inlined and optimized out everything having to do with tuples and references, emitting assembly almost identical to hand-written code.
Question 1: How to properly check this statement?
So here is what I tried: Since I never before touched or looked at assembly I wanted to have the most simple code example possible. However, I also want to compare the assembly versions optimized by the compiler to not get clutter from not-inlined functions etc. The result was that the compiler basically optimized my entire MWE away. Therefore, I added the cout
. But this of course introduces some unwanted clutter to the assembly.
Question 2: What are neat tricks to avoid the code from being optimized away completely without introducing to much new code?
So my MWE at the moment is the one you find below. Using Visual Studio in release mode I compiled the code and used this to see the assembly: How to view the assembly behind the code using Visual C++?. First I made sure that the C++14 suggestion (the auto tied()...
line) yields the exact same assembly code as the one without this extra function, which it does. Then I wanted to compare it to the handwritten code but get completely different assembly output, or so it seems since I don't really know much about assembly.
Question 3: I see similar lines between the two versions but lots of additional lines in the one that uses tuple. What do these additional lines do?
Question 4: Length of assembly does not equal slowness of code. Ok, but how much can one actually read out of this assemblies about performance of the two versions?
MWE - EDIT: Now without getting optimized away hopefully
#include <tuple>
#include <iostream>
struct item_new {
item_new(int lx = 0, int ly = 0, int lz = 0, int ld = 0) :x(lx), y(ly), z(lz), d(ld) {}
int x, y, z, d;
auto tied() const { return std::tie(z, y, x, d); }
bool operator<(const item_new &rhs) const {
return tied() < rhs.tied();
}
};
struct item {
item(int lx = 0, int ly = 0, int lz = 0, int ld = 0) :x(lx), y(ly), z(lz), d(ld) {}
int x, y, z, d;
bool operator<(const item &rhs) const {
if (z < rhs.z)
return true;
if (z == rhs.z) {
if (y < rhs.y)
return true;
if (y == rhs.y) {
if (x < rhs.x)
return true;
if (x == rhs.x && d < rhs.d)
return true;
}
}
return false;
}
};
int main()
{
int x,y,z,d;
std::cin >> x;
std::cin >> y;
std::cin >> z;
std::cin >> d;
item a(x, y, z, d), b(7, 8, 9, 0);
//item_new a(x,y,z,d), b(7,8,9,0);
bool result = a<b;
std::cout << result;
return 0;
}
Assembly of handwritten version
int main()
{
012B1002 in al,dx
012B1003 and esp,0FFFFFFF8h
012B1006 sub esp,18h
012B1009 mov eax,dword ptr ds:[012B3004h]
012B100E xor eax,esp
012B1010 mov dword ptr [esp+14h],eax
int x,y,z,d;
std::cin >> x;
012B1014 mov ecx,dword ptr ds:[12B2038h]
012B101A lea eax,[esp+0Ch]
012B101E push eax
012B101F call dword ptr ds:[12B2034h]
std::cin >> y;
012B1025 mov ecx,dword ptr ds:[12B2038h]
012B102B lea eax,[esp+8]
012B102F push eax
012B1030 call dword ptr ds:[12B2034h]
std::cin >> z;
012B1036 mov ecx,dword ptr ds:[12B2038h]
012B103C lea eax,[esp+4]
012B1040 push eax
012B1041 call dword ptr ds:[12B2034h]
std::cin >> d;
012B1047 mov ecx,dword ptr ds:[12B2038h]
std::cin >> d;
012B104D lea eax,[esp+10h]
012B1051 push eax
012B1052 call dword ptr ds:[12B2034h]
item a(x, y, z, d), b(7, 8, 9, 0);
//item_new a(x,y,z,d), b(7,8,9,0);
bool result = a<b;
012B1058 mov eax,dword ptr [esp+4]
012B105C cmp eax,9
012B105F jge main+67h (012B1067h)
012B1061 mov byte ptr [esp],1
012B1065 jmp main+8Eh (012B108Eh)
012B1067 jne main+8Ah (012B108Ah)
012B1069 mov eax,dword ptr [esp+8]
012B106D cmp eax,8
012B1070 jl main+61h (012B1061h)
012B1072 jne main+8Ah (012B108Ah)
012B1074 mov eax,dword ptr [esp+0Ch]
012B1078 cmp eax,7
012B107B jl main+61h (012B1061h)
012B107D jne main+8Ah (012B108Ah)
012B107F cmp dword ptr [esp+10h],0
012B1084 mov byte ptr [esp],1
012B1088 jl main+8Eh (012B108Eh)
012B108A mov byte ptr [esp],0
std::cout << result;
012B108E push dword ptr [esp]
012B1091 mov ecx,dword ptr ds:[12B2040h]
012B1097 call dword ptr ds:[12B203Ch]
return 0;
}
012B109D mov ecx,dword ptr [esp+14h]
012B10A1 xor eax,eax
012B10A3 xor ecx,esp
012B10A5 call __security_check_cookie (012B10AEh)
012B10AA mov esp,ebp
012B10AC pop ebp
012B10AD ret
Assembly of version using std::tie
int main()
{
00F41002 in al,dx
00F41003 and esp,0FFFFFFF8h
00F41006 sub esp,3Ch
00F41009 mov eax,dword ptr ds:[00F43004h]
00F4100E xor eax,esp
00F41010 mov dword ptr [esp+38h],eax
int x,y,z,d;
std::cin >> x;
00F41014 mov ecx,dword ptr ds:[0F42038h]
00F4101A lea eax,[esp+8]
00F4101E push esi
00F4101F push eax
00F41020 call dword ptr ds:[0F42034h]
std::cin >> y;
00F41026 mov ecx,dword ptr ds:[0F42038h]
00F4102C lea eax,[esp+10h]
00F41030 push eax
00F41031 call dword ptr ds:[0F42034h]
std::cin >> z;
00F41037 mov ecx,dword ptr ds:[0F42038h]
00F4103D lea eax,[esp+14h]
00F41041 push eax
00F41042 call dword ptr ds:[0F42034h]
std::cin >> d;
00F41048 mov ecx,dword ptr ds:[0F42038h]
00F4104E lea eax,[esp+18h]
00F41052 push eax
00F41053 call dword ptr ds:[0F42034h]
//item a(x, y, z, d), b(7, 8, 9, 0);
item_new a(x,y,z,d), b(7,8,9,0);
00F41059 mov edx,dword ptr [esp+14h]
00F4105D movaps xmm0,xmmword ptr ds:[0F42110h]
00F41064 mov eax,dword ptr [esp+0Ch]
00F41068 mov ecx,dword ptr [esp+10h]
00F4106C mov esi,dword ptr [esp+18h]
00F41070 mov dword ptr [esp+1Ch],eax
00F41074 mov dword ptr [esp+20h],ecx
00F41078 mov dword ptr [esp+24h],edx
00F4107C mov dword ptr [esp+28h],esi
00F41080 movups xmmword ptr [esp+2Ch],xmm0
bool result = a<b;
00F41085 cmp edx,9
00F41088 jl main+0A5h (0F410A5h)
00F4108A jg main+9Eh (0F4109Eh)
00F4108C cmp ecx,8
00F4108F jl main+0A5h (0F410A5h)
00F41091 jg main+9Eh (0F4109Eh)
00F41093 cmp eax,7
00F41096 jl main+0A5h (0F410A5h)
00F41098 jg main+9Eh (0F4109Eh)
00F4109A test esi,esi
00F4109C js main+0A5h (0F410A5h)
00F4109E mov byte ptr [esp+8],0
00F410A3 jmp main+0AAh (0F410AAh)
00F410A5 mov byte ptr [esp+8],1
std::cout << result;
00F410AA push dword ptr [esp+8]
00F410AE mov ecx,dword ptr ds:[0F42040h]
00F410B4 call dword ptr ds:[0F4203Ch]
return 0;
}
00F410BA mov ecx,dword ptr [esp+3Ch]
00F410BE xor eax,eax
00F410C0 pop esi
00F410C1 xor ecx,esp
00F410C3 call __security_check_cookie (0F410CCh)
00F410C8 mov esp,ebp
00F410CA pop ebp
00F410CB ret