The following C code should simply execute p
times the same assembly code, which in turns should only decrease of the ecx
register in sixteen loops from 16 to 0.
When p
is small, the program completes quickly, but when p
is high (say p = 16
), its execution time increases exponentially.
#include <stdio.h>
#include <stdlib.h>
int main() {
int p = 16;
int i;
for(i=0; i<p; i++) {
int c = 16;
__asm__(
"mov %[c], %%rcx \n"
"loop: \n"
"sub $1, %%rcx \n"
"jnz loop \n"
:
: [c]"m" (c)
: "rcx"
);
}
return 0;
}
Strangely enough, when adding some lines to measure the execution time, the program completes as fast as expected, without any exponential increase effect:
#include <stdio.h>
#include <stdlib.h>
#include <time.h> //added
int main() {
int p = 16;
int i;
clock_t start, end; //added
start = clock(); //added
for(i=0; i<p; i++) {
int c = 16;
__asm__(
"mov %[c], %%rcx \n"
"loop: \n"
"sub $1, %%rcx \n"
"jnz loop \n"
:
: [c]"m" (c)
: "rcx"
);
}
end = clock(); //added
float time = (float)(end - start)/CLOCKS_PER_SEC; //added
printf("Time spent: %f\n", time); //added
return 0;
}
How to avoid such an issue?