I have written some C++ code to test to time C++ and in-line assembler code. I was initially just having fun with it, but then I noticed that every time I ran my program, I got different results. Sometimes the C++ was faster, sometimes the inline assembler code was faster, and sometimes they were all the same.
What is going on here?
Here's the code with the program output:
#define TRIALS 1000000
#include <iostream>
using namespace std;
typedef std::chrono::high_resolution_clock Clock;
int main()
{
auto t1 = Clock::now();
auto t2 = Clock::now();
int X3=17;
int X2=17;
int X4=17;
int X=17;
int sum=0;
int avg=0;
cout << "=================================" << endl;
cout << "| var*=10; |" << endl;
cout << "=================================" << endl;
for( int i=0; i<TRIALS; i++ )
{
X3=17;
t1 = Clock::now();
X3*=10;
t2 = Clock::now();
sum+=chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
}
avg=sum/TRIALS;
cout << "| Product: " << X3<< " "<< avg << " nanoseconds |" << endl;
cout << "=================================" << endl;
cout << endl << endl;
avg=sum=0;
cout << "=================================" << endl;
cout << "| use inline assembler with shl |" << endl;
cout << "=================================" << endl;
for( int i=0; i<TRIALS; i++ )
{
X=17;
t1 = Clock::now();
asm /*volatile*/ (
"movl %0, %%eax;" // X->ax
"shll %%eax;"// ax*=2
"movl %%eax, %%ebx;" // ax->bx
"shll %%eax;" // ax*=2
"shll %%eax;" // ax*=2
"add %%ebx, %%eax;" // bx+ax->ax
: "=a" (X)
: "a" (X)
: "%ebx"
);
t2 = Clock::now();
sum+=chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
}
avg=sum/TRIALS;
cout << "| Product: " << X << " "<< avg << " nanoseconds |" << endl;
cout << "=================================" << endl;
cout << endl << endl;
avg=sum=0;
cout << "=================================" << endl;
cout << "| var=var*10 |" << endl;
cout << "=================================" << endl;
for( int i=0; i<TRIALS; i++ )
{
X2=17;
t1 = Clock::now();
X2=X2*10;
t2 = Clock::now();
sum+=chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
}
avg=sum/TRIALS;
cout << "| Product: " << X3<< " "<< avg << " nanoseconds |" << endl;
cout << "=================================" << endl;
cout << endl << endl;
avg=sum=0;
cout << "=================================" << endl;
cout << "| use inline assembler with mul |" << endl;
cout << "=================================" << endl;
for( int i=0; i<TRIALS; i++ )
{
X4=17;
t1 = Clock::now();
asm (
"movl %0, %%eax;" // X->ax
"movl $0x0A, %%ebx;" // 10->bx
"mul %%ebx;" // 10*ax->ax
: "=a" (X4)
: "a" (X4)
: "%ebx"
);
t2 = Clock::now();
sum+=chrono::duration_cast<std::chrono::nanoseconds>(t2 - t1).count();
}
avg=sum/TRIALS;
cout << "| Product: " << X4<< " "<< avg << " nanoseconds |" << endl;
cout << "=================================" << endl;
cout << endl;
return(0);
}
PROGRAM OUTPUT #1:
=================================
| var*=10; |
=================================
| Product: 170 50 nanoseconds |
=================================
=================================
| use inline assembler with shl |
=================================
| Product: 170 50 nanoseconds |
=================================
=================================
| var=var*10 |
=================================
| Product: 170 50 nanoseconds |
=================================
=================================
| use inline assembler with mul |
=================================
| Product: 170 50 nanoseconds |
=================================
OUTPUT #2:
=================================
| var*=10; |
=================================
| Product: 170 62 nanoseconds |
=================================
=================================
| use inline assembler with shl |
=================================
| Product: 170 57 nanoseconds |
=================================
=================================
| var=var*10 |
=================================
| Product: 170 59 nanoseconds |
=================================
=================================
| use inline assembler with mul |
=================================
| Product: 170 58 nanoseconds |
=================================