please take a look at gcc manual to find more about optimization flags in gcc and optimization levels , but to sum up the -O2 optimization will change your code and make a loop optimization in your code , I generated 2 assembly files using GCC , first one is without any optimization and the second is with optimization level set to -O2
with no optimization
take a look at the next assembly part of the file codes and the full file can be found at this link:
# main.cpp:16: for (__int64 c = 0; c < LLONG_MAX; c++)
movl $0, -48(%ebp) #, c
movl $0, -44(%ebp) #, c
L10:
# main.cpp:16: for (__int64 c = 0; c < LLONG_MAX; c++)
movl -48(%ebp), %eax # c, tmp108
xorl $-1, %eax #, tmp108
movl %eax, -60104(%ebp) # tmp108, %sfp
movl -44(%ebp), %eax # c, tmp109
xorl $2147483647, %eax #, tmp109
movl %eax, -60100(%ebp) # tmp109, %sfp
movl -60104(%ebp), %ecx # %sfp, tmp107
movl -60100(%ebp), %ebx # %sfp,
movl %ebx, %eax #, tmp110
orl %ecx, %eax # tmp107, tmp110
testl %eax, %eax # tmp110
je L7 #,
# main.cpp:18: for (int i = 1; i < MAX_VAL - 1; i++)
movl $1, -52(%ebp) #, i
L9:
# main.cpp:18: for (int i = 1; i < MAX_VAL - 1; i++)
cmpl $14998, -52(%ebp) #, i
jg L8 #,
# main.cpp:20: arr[i] += arr[i + 1];
movl -52(%ebp), %eax # i, tmp111
movl -60088(%ebp,%eax,4), %edx # arr, _1
# main.cpp:20: arr[i] += arr[i + 1];
movl -52(%ebp), %eax # i, tmp112
addl $1, %eax #, _2
# main.cpp:20: arr[i] += arr[i + 1];
movl -60088(%ebp,%eax,4), %eax # arr, _3
# main.cpp:20: arr[i] += arr[i + 1];
addl %eax, %edx # _3, _4
movl -52(%ebp), %eax # i, tmp113
movl %edx, -60088(%ebp,%eax,4) # _4, arr
# main.cpp:21: arr[i - 1] -= arr[i];
you will find lines like # main.cpp:16: for (__int64 c = 0; c < LLONG_MAX; c++)
indicating the equivalent line of that line in assembly code, also you will find alot of jmp
commands and labels which express loops , by the way to generate this file, use the following command line in your cmd or terminal : g++ -fverbose-asm main.cpp -S main.s
.
using -O2 optimization
take a look at the next assembly part of the file codes and the full file can be found at this link:
# main.cpp:6: int main() {
call ___main #
# main.cpp:10: std::cout << "Measuring array...\n";
movl $LC0, 4(%esp) #,
movl $__ZSt4cout, (%esp) #,
call __ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc #
# main.cpp:11: start = std::chrono::steady_clock::now();
call __ZNSt6chrono3_V212steady_clock3nowEv #
movl %eax, %esi # tmp92, start
movl %edx, %edi #, start
# main.cpp:27: end = std::chrono::steady_clock::now();
call __ZNSt6chrono3_V212steady_clock3nowEv #
# c:\mingw\lib\gcc\mingw32\9.2.0\include\c++\ostream:202: { return _M_insert(__n); }
movl $__ZSt4cout, %ecx #,
# c:\mingw\lib\gcc\mingw32\9.2.0\include\c++\chrono:469: return __cd(__cd(__lhs).count() - __cd(__rhs).count());
subl %esi, %eax # start, tmp89
sbbl %edi, %edx # start,
# c:\mingw\lib\gcc\mingw32\9.2.0\include\c++\ostream:202: { return _M_insert(__n); }
movl %eax, (%esp) # tmp89,
movl %edx, 4(%esp) #,
call __ZNSo9_M_insertIxEERSoT_ #
subl $8, %esp #,
# main.cpp:29: std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << "\n";
movl $LC1, 4(%esp) #,
movl %eax, (%esp) # tmp94,
call __ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc #
# main.cpp:32: }
you will notice that it's a smaller file in terms of lines of code and it runs faster. because in the optimized version ,the compiler removed all of your loops , you will find only one jmp
command in this file. so yes optimization affects your code. and you will not find any nested loops in your optimized version of this code , so yes the -O2 optimization does affect your code., by the way to generate this file, use the following command line in your cmd or terminal : g++ -O2 -fverbose-asm main.cpp -S main.s
.
solving this problem
to prevent the compiler from optimizing any part of code , you can make this code a separate function and mark this function with the keyword volatile
where volatile
prevents the compiler from optimizing this part of code , also volatile
can be used with variables not only function to protect them from optimization , refer to geeks for geeks for more examples and information , but for your code , to protect it from optimization , you can do this , but by the way it's not the best practice :
#include <iostream>
#include <chrono>
#include <climits>
volatile void func()
{
#define MAX_VAL 15000
std::chrono::steady_clock::time_point start, end;
int arr[MAX_VAL];
start = std::chrono::steady_clock::now();
for (__int64 a = 0; a < LLONG_MAX; a++)
{
for (__int64 b = 0; b < LLONG_MAX; b++)
{
for (__int64 c = 0; c < LLONG_MAX; c++)
{
for (int i = 1; i < MAX_VAL - 1; i++)
{
arr[i] += arr[i + 1];
arr[i - 1] -= arr[i];
arr[i] *= 2;
}
}
}
}
end = std::chrono::steady_clock::now();
std::cout << std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count() << "\n";
}
int main() {
std::cout << "Measuring array...\n";
func();
return 0;
}