Have noticed very strange behavior when adding the -O2
optimizer flag when compiling a simple program.
#include <iostream>
int main() {
for (int i = 0; i < 10; i++) {
std::cout << i << std::endl;
}
return 0;
}
program output:
1
2
3
4
5
6
7
...
1228881
...
The program works as expected with the -O3
or -Ofast
flags.
output of running clang++ -O2 -v test.cpp
:
vm-hw06{mrussell}125: clang++ -O2 -v test.cpp
clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.co\
m/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /comp/15/bin
Found candidate GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Selected GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Candidate multilib: .;@m64
Selected multilib: .;@m64
"/h/mrussell/clang/bin/clang-10" -cc1 -triple x86_64-unknown-linux-gnu -emit-obj -disable-free -main-file-name test.cpp -mrelocation-model static -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -v -resource-dir /h/mrussell/clang/lib/clang/10.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0/x86_64-pc-linux-gnu -internal-isystem /h/mrussell
A curious point is that removing << endl
fixes whatever is bugged. Likewise, replacing the std::cout
line with a printf
works as expected. Update - removing the << i
but keeping the << std::endl
also works - 10 blank lines printed without fail! Am flummoxed here, any help would be greatly appreciated!
PS: Note that I built clang++ based on this doc: https://btorpey.github.io/blog/2015/01/02/building-clang/ - have been using it to compile basic programs with no such issues for >1 year.
Update:
output of clang++ -O2 -S test.cpp
.text
.file "test.cpp"
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
pushq %r14
.cfi_def_cfa_offset 24
pushq %rbx
.cfi_def_cfa_offset 32
.cfi_offset %rbx, -32
.cfi_offset %r14, -24
.cfi_offset %rbp, -16
xorl %r14d, %r14d
jmp .LBB0_1
.p2align 4, 0x90
.LBB0_4: # %if.end.i
# in Loop: Header=BB0_1 Depth=1
movq %rbp, %rdi
callq _ZNKSt5ctypeIcE13_M_widen_initEv
movq (%rbp), %rax
movq %rbp, %rdi
movl $10, %esi
callq *48(%rax)
.LBB0_5: # %_ZNKSt5ctypeIcE5widenEc.exit
# in Loop: Header=BB0_1 Depth=1
movsbl %al, %esi
movq %rbx, %rdi
callq _ZNSo3putEc
movq %rax, %rdi
callq _ZNSo5flushEv
addl $1, %r14d
cmpl $-101, %r14d
je .LBB0_6
.LBB0_1: # %for.body
# =>This Inner Loop Header: Depth=1
movl $_ZSt4cout, %edi
movl %r14d, %esi
callq _ZNSolsEi
movq %rax, %rbx
movq (%rax), %rax
movq -24(%rax), %rax
movq 240(%rbx,%rax), %rbp
testq %rbp, %rbp
je .LBB0_7
# %bb.2: # %_ZSt13__check_facetISt5ctypeIcEERKT_PS3_.exit
# in Loop: Header=BB0_1 Depth=1
cmpb $0, 56(%rbp)
je .LBB0_4
# %bb.3: # %if.then.i
# in Loop: Header=BB0_1 Depth=1
movzbl 67(%rbp), %eax
jmp .LBB0_5
.LBB0_6: # %for.cond.cleanup
xorl %eax, %eax
popq %rbx
.cfi_def_cfa_offset 24
popq %r14
.cfi_def_cfa_offset 16
popq %rbp
.cfi_def_cfa_offset 8
retq
.LBB0_7: # %if.then.i10
.cfi_def_cfa_offset 32
callq _ZSt16__throw_bad_castv
.Lfunc_end0:
.size main, .Lfunc_end0-main
.cfi_endproc
# -- End function
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function _GLOBAL__sub_I_test.cpp
.type _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp: # @_GLOBAL__sub_I_test.cpp
.cfi_startproc
# %bb.0: # %entry
pushq %rax
.cfi_def_cfa_offset 16
movl $_ZStL8__ioinit, %edi
callq _ZNSt8ios_base4InitC1Ev
movl $_ZNSt8ios_base4InitD1Ev, %edi
movl $_ZStL8__ioinit, %esi
movl $__dso_handle, %edx
popq %rax
.cfi_def_cfa_offset 8
jmp __cxa_atexit # TAILCALL
.Lfunc_end1:
.size _GLOBAL__sub_I_test.cpp, .Lfunc_end1-_GLOBAL__sub_I_test.cpp
.cfi_endproc
# -- End function
.type _ZStL8__ioinit,@object # @_ZStL8__ioinit
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.section .init_array,"aw",@init_array
.p2align 3
.quad _GLOBAL__sub_I_test.cpp
.ident "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym _GLOBAL__sub_I_test.cpp
.addrsig_sym _ZStL8__ioinit
.addrsig_sym __dso_handle
.addrsig_sym _ZSt4cout
And, for reference, output of clang++ -S test.cpp
.text
.file "test.cpp"
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function __cxx_global_var_init
.type __cxx_global_var_init,@function
__cxx_global_var_init: # @__cxx_global_var_init
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
movabsq $_ZStL8__ioinit, %rdi
callq _ZNSt8ios_base4InitC1Ev
movabsq $_ZNSt8ios_base4InitD1Ev, %rax
movq %rax, %rdi
movabsq $_ZStL8__ioinit, %rsi
movabsq $__dso_handle, %rdx
callq __cxa_atexit
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end0:
.size __cxx_global_var_init, .Lfunc_end0-__cxx_global_var_init
.cfi_endproc
# -- End function
.text
.globl main # -- Begin function main
.p2align 4, 0x90
.type main,@function
main: # @main
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
subq $16, %rsp
movl $0, -4(%rbp)
movl $0, -8(%rbp)
.LBB1_1: # %for.cond
# =>This Inner Loop Header: Depth=1
cmpl $10, -8(%rbp)
jge .LBB1_4
# %bb.2: # %for.body
# in Loop: Header=BB1_1 Depth=1
movl -8(%rbp), %esi
movabsq $_ZSt4cout, %rdi
callq _ZNSolsEi
movq %rax, %rdi
movabsq $_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_, %rsi
callq _ZNSolsEPFRSoS_E
# %bb.3: # %for.inc
# in Loop: Header=BB1_1 Depth=1
movl -8(%rbp), %eax
addl $1, %eax
movl %eax, -8(%rbp)
jmp .LBB1_1
.LBB1_4: # %for.end
xorl %eax, %eax
addq $16, %rsp
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end1:
.size main, .Lfunc_end1-main
.cfi_endproc
# -- End function
.section .text.startup,"ax",@progbits
.p2align 4, 0x90 # -- Begin function _GLOBAL__sub_I_test.cpp
.type _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp: # @_GLOBAL__sub_I_test.cpp
.cfi_startproc
# %bb.0: # %entry
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset %rbp, -16
movq %rsp, %rbp
.cfi_def_cfa_register %rbp
callq __cxx_global_var_init
popq %rbp
.cfi_def_cfa %rsp, 8
retq
.Lfunc_end2:
.size _GLOBAL__sub_I_test.cpp, .Lfunc_end2-_GLOBAL__sub_I_test.cpp
.cfi_endproc
# -- End function
.type _ZStL8__ioinit,@object # @_ZStL8__ioinit
.local _ZStL8__ioinit
.comm _ZStL8__ioinit,1,1
.hidden __dso_handle
.section .init_array,"aw",@init_array
.p2align 3
.quad _GLOBAL__sub_I_test.cpp
.ident "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
.section ".note.GNU-stack","",@progbits
.addrsig
.addrsig_sym __cxx_global_var_init
.addrsig_sym __cxa_atexit
.addrsig_sym _ZNSolsEi
.addrsig_sym _ZNSolsEPFRSoS_E
.addrsig_sym _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
.addrsig_sym _GLOBAL__sub_I_test.cpp
.addrsig_sym _ZStL8__ioinit
.addrsig_sym __dso_handle
.addrsig_sym _ZSt4cout