OK, I've been talking to a friend about compilers and optimisation of programs, and he suggested that n * 0.5
is faster than n / 2
. I said that compilers do that kind of optimisation automatically, so I wrote a small program to see if there was a difference between n / 2
and n * 0.5
:
Division:
#include <stdio.h>
#include <time.h>
int main(int argc, const char * argv[]) {
int i, m;
float n, s;
clock_t t;
m = 1000000000;
t = clock();
for(i = 0; i < m; i++) {
n = i / 2;
}
s = (float)(clock() - t) / CLOCKS_PER_SEC;
printf("n = i / 2: %d calculations took %f seconds (last calculation = %f)\n", m, s, n);
return 0;
}
Multiplication:
#include <stdio.h>
#include <time.h>
int main(int argc, const char * argv[]) {
int i, m;
float n, s;
clock_t t;
m = 1000000000;
t = clock();
for(i = 0; i < m; i++) {
n = i * 0.5;
}
s = (float)(clock() - t) / CLOCKS_PER_SEC;
printf("n = i * 0.5: %d calculations took %f seconds (last calculation = %f)\n", m, s, n);
return 0;
}
And for both versions I got 0.000002s avg. when compiled with clang main.c -O1
. And he said there must be something wrong with the time measurement. So he then wrote a program:
#include <cstdio>
#include <iostream>
#include <ctime>
using namespace std;
int main()
{
clock_t ts, te;
double dT;
int i, m;
double n, o, p, q, r, s;
m = 1000000000;
cout << "Independent calculations:\n";
ts = clock();
for (i = 0; i < m; i++)
{
// make it a trivial pure float calculation with no int casting to float
n = 11.1 / 2.3;
o = 22.2 / 2.3;
p = 33.3 / 2.3;
q = 44.4 / 2.3;
r = 55.5 / 2.3;
s = 66.6 / 2.3;
}
te = clock();
dT = ((float)(te - ts)) / CLOCKS_PER_SEC; // make initial call to get the elapsed time to run the loop
ts = clock();
printf("Division: %d calculations took %f seconds\n", m, dT);
for (i = 0; i < m; i++)
{
// make it a trivial pure float calculation with no int casting to float
n = 11.1 * 0.53;
o = 22.2 * 0.53;
p = 33.3 * 0.53;
q = 44.4 * 0.53;
r = 55.5 * 0.53;
s = 66.6 * 0.53;
}
te = clock();
dT = ((float)(te - ts)) / CLOCKS_PER_SEC; // make initial call to get the elapsed time to run the loop
ts = clock();
printf("Multiplication: %d calculations took %f seconds\n", m, dT);
cout << "\nDependent calculations:\n";
for (i = 0; i < m; i++)
{
// make it a trivial pure float calculation with no int casting to float
n = 11.1 / 2.3;
o = n / 2.3;
p = o / 2.3;
q = p / 2.3;
r = q / 2.3;
s = r / 2.3;
}
te = clock();
dT = ((float)(te - ts)) / CLOCKS_PER_SEC; // make initial call to get the elapsed time to run the loop
ts = clock();
printf("Division: %d calculations took %f seconds\n", m, dT);
for (i = 0; i < m; i++)
{
// make it a trivial pure float calculation with no int casting to float
n = 11.1 * 0.53;
o = n * 0.53;
p = o * 0.53;
q = p * 0.53;
r = q * 0.53;
s = r * 0.53;
}
te = clock();
dT = ((float)(te - ts)) / CLOCKS_PER_SEC; // make initial call to get the elapsed time to run the loop
ts = clock();
printf("Multiplication: %d calculations took %f seconds\n", m, dT);
return 0;
}
And for that he got...
1.869570s
1.868254s
25.674016s
3.497555s
...in that order.
So I ran the program on my machine compiled with clang++ main.cpp -O1
and I got similar results as before: 0.000002 to 0.000011
.
However, when I compiled the program without optimisation, I got similar results to him on his first test. So my question is, how can any amount of optimisation make the program that much faster?