1

Have noticed very strange behavior when adding the -O2 optimizer flag when compiling a simple program.

#include <iostream>

int main() {
    for (int i = 0; i < 10; i++) {
        std::cout << i << std::endl;
    }
    return 0;
}

program output:

1
2
3
4
5
6
7
...
1228881
...

The program works as expected with the -O3 or -Ofast flags.

output of running clang++ -O2 -v test.cpp:

vm-hw06{mrussell}125: clang++ -O2 -v test.cpp
clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.co\
m/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)
Target: x86_64-unknown-linux-gnu
Thread model: posix
InstalledDir: /comp/15/bin
Found candidate GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Selected GCC installation: /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0
Candidate multilib: .;@m64
Selected multilib: .;@m64
 "/h/mrussell/clang/bin/clang-10" -cc1 -triple x86_64-unknown-linux-gnu -emit-obj -disable-free -main-file-name test.cpp -mrelocation-model static -mthread-model posix -mframe-pointer=none -fmath-errno -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -v -resource-dir /h/mrussell/clang/lib/clang/10.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0 -internal-isystem /h/mrussell/gcc-10.2/lib/gcc/x86_64-pc-linux-gnu/11.0.0/../../../../include/c++/11.0.0/x86_64-pc-linux-gnu -internal-isystem /h/mrussell

A curious point is that removing << endl fixes whatever is bugged. Likewise, replacing the std::cout line with a printf works as expected. Update - removing the << i but keeping the << std::endl also works - 10 blank lines printed without fail! Am flummoxed here, any help would be greatly appreciated!

PS: Note that I built clang++ based on this doc: https://btorpey.github.io/blog/2015/01/02/building-clang/ - have been using it to compile basic programs with no such issues for >1 year.

Update: output of clang++ -O2 -S test.cpp

        .text
        .file   "test.cpp"
        .globl  main                    # -- Begin function main
        .p2align        4, 0x90
        .type   main,@function
main:                                   # @main
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbp
        .cfi_def_cfa_offset 16
        pushq   %r14
        .cfi_def_cfa_offset 24
        pushq   %rbx
        .cfi_def_cfa_offset 32
        .cfi_offset %rbx, -32
        .cfi_offset %r14, -24
        .cfi_offset %rbp, -16
        xorl    %r14d, %r14d
        jmp     .LBB0_1
        .p2align        4, 0x90
.LBB0_4:                                # %if.end.i
                                        #   in Loop: Header=BB0_1 Depth=1
        movq    %rbp, %rdi
        callq   _ZNKSt5ctypeIcE13_M_widen_initEv
        movq    (%rbp), %rax
        movq    %rbp, %rdi
        movl    $10, %esi
        callq   *48(%rax)
.LBB0_5:                                # %_ZNKSt5ctypeIcE5widenEc.exit
                                        #   in Loop: Header=BB0_1 Depth=1
        movsbl  %al, %esi
        movq    %rbx, %rdi
        callq   _ZNSo3putEc
        movq    %rax, %rdi
        callq   _ZNSo5flushEv
        addl    $1, %r14d
        cmpl    $-101, %r14d
        je      .LBB0_6
.LBB0_1:                                # %for.body
                                        # =>This Inner Loop Header: Depth=1
        movl    $_ZSt4cout, %edi
        movl    %r14d, %esi
        callq   _ZNSolsEi
        movq    %rax, %rbx
        movq    (%rax), %rax
        movq    -24(%rax), %rax
        movq    240(%rbx,%rax), %rbp
        testq   %rbp, %rbp
        je      .LBB0_7
# %bb.2:                                # %_ZSt13__check_facetISt5ctypeIcEERKT_PS3_.exit
                                        #   in Loop: Header=BB0_1 Depth=1
        cmpb    $0, 56(%rbp)
        je      .LBB0_4
# %bb.3:                                # %if.then.i
                                        #   in Loop: Header=BB0_1 Depth=1
        movzbl  67(%rbp), %eax
        jmp     .LBB0_5
.LBB0_6:                                # %for.cond.cleanup
        xorl    %eax, %eax
        popq    %rbx
        .cfi_def_cfa_offset 24
        popq    %r14
        .cfi_def_cfa_offset 16
        popq    %rbp
        .cfi_def_cfa_offset 8
        retq
.LBB0_7:                                # %if.then.i10
        .cfi_def_cfa_offset 32
        callq   _ZSt16__throw_bad_castv
.Lfunc_end0:
        .size   main, .Lfunc_end0-main
        .cfi_endproc
                                        # -- End function
        .section        .text.startup,"ax",@progbits
        .p2align        4, 0x90         # -- Begin function _GLOBAL__sub_I_test.cpp
        .type   _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp:                # @_GLOBAL__sub_I_test.cpp
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rax
        .cfi_def_cfa_offset 16
        movl    $_ZStL8__ioinit, %edi
        callq   _ZNSt8ios_base4InitC1Ev
        movl    $_ZNSt8ios_base4InitD1Ev, %edi
        movl    $_ZStL8__ioinit, %esi
        movl    $__dso_handle, %edx
        popq    %rax
        .cfi_def_cfa_offset 8
        jmp     __cxa_atexit            # TAILCALL
.Lfunc_end1:
        .size   _GLOBAL__sub_I_test.cpp, .Lfunc_end1-_GLOBAL__sub_I_test.cpp
        .cfi_endproc
                                        # -- End function
        .type   _ZStL8__ioinit,@object  # @_ZStL8__ioinit
        .local  _ZStL8__ioinit
        .comm   _ZStL8__ioinit,1,1
        .hidden __dso_handle
        .section        .init_array,"aw",@init_array
        .p2align        3
        .quad   _GLOBAL__sub_I_test.cpp

        .ident  "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
        .section        ".note.GNU-stack","",@progbits
        .addrsig
        .addrsig_sym _GLOBAL__sub_I_test.cpp
        .addrsig_sym _ZStL8__ioinit
        .addrsig_sym __dso_handle
        .addrsig_sym _ZSt4cout

And, for reference, output of clang++ -S test.cpp

        .text
        .file   "test.cpp"
        .section        .text.startup,"ax",@progbits
        .p2align        4, 0x90         # -- Begin function __cxx_global_var_init
        .type   __cxx_global_var_init,@function
__cxx_global_var_init:                  # @__cxx_global_var_init
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register %rbp
        movabsq $_ZStL8__ioinit, %rdi
        callq   _ZNSt8ios_base4InitC1Ev
        movabsq $_ZNSt8ios_base4InitD1Ev, %rax
        movq    %rax, %rdi
        movabsq $_ZStL8__ioinit, %rsi
        movabsq $__dso_handle, %rdx
        callq   __cxa_atexit
        popq    %rbp
        .cfi_def_cfa %rsp, 8
        retq
.Lfunc_end0:
        .size   __cxx_global_var_init, .Lfunc_end0-__cxx_global_var_init
        .cfi_endproc
                                        # -- End function
        .text
        .globl  main                    # -- Begin function main
        .p2align        4, 0x90
        .type   main,@function
main:                                   # @main
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register %rbp
        subq    $16, %rsp
        movl    $0, -4(%rbp)
        movl    $0, -8(%rbp)
.LBB1_1:                                # %for.cond
                                        # =>This Inner Loop Header: Depth=1
        cmpl    $10, -8(%rbp)
        jge     .LBB1_4
# %bb.2:                                # %for.body
                                        #   in Loop: Header=BB1_1 Depth=1
        movl    -8(%rbp), %esi
        movabsq $_ZSt4cout, %rdi
        callq   _ZNSolsEi
        movq    %rax, %rdi
        movabsq $_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_, %rsi
        callq   _ZNSolsEPFRSoS_E
# %bb.3:                                # %for.inc
                                        #   in Loop: Header=BB1_1 Depth=1
        movl    -8(%rbp), %eax
        addl    $1, %eax
        movl    %eax, -8(%rbp)
        jmp     .LBB1_1
.LBB1_4:                                # %for.end
        xorl    %eax, %eax
        addq    $16, %rsp
        popq    %rbp
        .cfi_def_cfa %rsp, 8
        retq
.Lfunc_end1:
        .size   main, .Lfunc_end1-main
        .cfi_endproc
                                        # -- End function
        .section        .text.startup,"ax",@progbits
        .p2align        4, 0x90         # -- Begin function _GLOBAL__sub_I_test.cpp
        .type   _GLOBAL__sub_I_test.cpp,@function
_GLOBAL__sub_I_test.cpp:                # @_GLOBAL__sub_I_test.cpp
        .cfi_startproc
# %bb.0:                                # %entry
        pushq   %rbp
        .cfi_def_cfa_offset 16
        .cfi_offset %rbp, -16
        movq    %rsp, %rbp
        .cfi_def_cfa_register %rbp
        callq   __cxx_global_var_init
        popq    %rbp
        .cfi_def_cfa %rsp, 8
        retq
.Lfunc_end2:
        .size   _GLOBAL__sub_I_test.cpp, .Lfunc_end2-_GLOBAL__sub_I_test.cpp
        .cfi_endproc
                                        # -- End function
        .type   _ZStL8__ioinit,@object  # @_ZStL8__ioinit
        .local  _ZStL8__ioinit
        .comm   _ZStL8__ioinit,1,1
        .hidden __dso_handle
        .section        .init_array,"aw",@init_array
        .p2align        3
        .quad   _GLOBAL__sub_I_test.cpp

        .ident  "clang version 10.0.0 (https://github.com/llvm-mirror/clang aa231e4be75ac4759c236b755c57876f76e3cf05) (https://github.com/llvm-mirror/llvm 2c4ca6832fa6b306ee6a7010bfb80a3f2596f824)"
        .section        ".note.GNU-stack","",@progbits
        .addrsig
        .addrsig_sym __cxx_global_var_init
        .addrsig_sym __cxa_atexit
        .addrsig_sym _ZNSolsEi
        .addrsig_sym _ZNSolsEPFRSoS_E
        .addrsig_sym _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_
        .addrsig_sym _GLOBAL__sub_I_test.cpp
        .addrsig_sym _ZStL8__ioinit
        .addrsig_sym __dso_handle
        .addrsig_sym _ZSt4cout
optimus_prime
  • 1,039
  • 2
  • 10
  • 21
  • 1
    Output seems fine [here](https://godbolt.org/z/GM5soGezG) using the same clang version... – ChrisMM Apr 23 '22 at 01:22
  • And yet, it fails on our system! – optimus_prime Apr 23 '22 at 01:24
  • Best thing you can do to see what is going on is peep the assembly. That being said, have you updated the compiler recently? If yes, that may be the problem. If no, that may be the problem. – Taekahn Apr 23 '22 at 01:38
  • Have posted assembly for reference. – optimus_prime Apr 23 '22 at 02:01
  • 1
    Try `\n` instead of `std::endl`? That would isolate the problem better. – Spencer Apr 23 '22 at 02:04
  • `\n` is successful. – optimus_prime Apr 23 '22 at 02:08
  • don't use `g++ -S` or `clang++ -S`. Try https://godbolt.org/ instead or if you want to do similar things to them see [How to remove "noise" from GCC/clang assembly output?](https://stackoverflow.com/q/38552116/995714) – phuclv Apr 23 '22 at 02:09
  • Also note that `std::cout << std::endl;` is successful. – optimus_prime Apr 23 '22 at 02:09
  • I blame Megatron. – user4581301 Apr 23 '22 at 02:24
  • 2
    The instructions state to check out from the llvm trunk, that's a very bad idea, it's entirely possible that you happened to check out at a time that the trunk was buggy, you should only ever use taggged versions for production use. I'd guess if you checkout a recent tag and rebuild your problem will go away – Alan Birtles Apr 23 '22 at 07:02
  • Thanks @Alan Birtles - great suggestion. Will do. – optimus_prime Apr 23 '22 at 14:45
  • 1
    So the bug is/was in the buffer flush - but only manifested itself when there was something other than the `\n` in the buffer. Another good reason to avoid `endl`. – Spencer Apr 23 '22 at 15:19
  • @AlanBirtles alas, a fresh build of clang-14.0.1 produces the same result. – optimus_prime Apr 25 '22 at 19:39
  • 2
    My next guess then would be an incompatibility with the version of libstdc++ in use. You might be better off using gcc/libstdc++ from devtoolset rather than building your own, you can also install clang from there: https://developers.redhat.com/blog/2017/11/01/getting-started-llvm-toolset#installing_llvm_toolset – Alan Birtles Apr 25 '22 at 19:51
  • Problem solved after new rebuild both gcc and clang -- @AlanBirtles if you'd like to make amalgamate some variant of your comments, as an answer, I'll gladly accept. (btw didn't install from devtoolset b/c don't have admin privileges on the server...). Thanks for the help – optimus_prime Apr 25 '22 at 23:09

1 Answers1

0

For anyone showing up to this thread looking for an answer - @AlanBirtles gave me some great tips. First was that the original build was from the trunk, rather than a tagged branch, and thus might be buggy. However, after rebuilding clang++ 12, 13, and 14 from source, no luck. As he mentioned later, however, there may be an issue with the compatibility of the libstdc++ in use. After a fresh build of gcc and clang from the new gcc, all is right with the universe. In some sense this is an unsatisfactory answer, as I'm not sure of the nature of the incompatibility. But, if someone stumbles across this thread with the same problem, you know what to do.

optimus_prime
  • 1,039
  • 2
  • 10
  • 21