I tested that on Compiler Explorer and found that copying can actually be done even with -O3
optimization with gcc 10.3.
Here is my code for test:
#include <iostream>
#include <vector>
using std::cout;
struct MyStruct {
int a[32];
};
std::ostream& operator<<(std::ostream& s, const MyStruct& m) {
for (int i = 0; i < 32; i++) s << m.a[i] << ' ';
return s;
}
std::vector<MyStruct> myStructList;
void test(void) {
for(MyStruct oneStruct : myStructList)
{
cout << oneStruct;
}
}
Here is a part of the result:
test():
pushq %r13
pushq %r12
pushq %rbp
pushq %rbx
subq $152, %rsp
movq myStructList(%rip), %r12
movq myStructList+8(%rip), %r13
cmpq %r13, %r12
je .L8
leaq 144(%rsp), %rbp
.L11:
movdqu (%r12), %xmm0
movdqu 16(%r12), %xmm1
leaq 16(%rsp), %rbx
movdqu 32(%r12), %xmm2
movdqu 48(%r12), %xmm3
movdqu 64(%r12), %xmm4
movdqu 80(%r12), %xmm5
movups %xmm0, 16(%rsp)
movdqu 96(%r12), %xmm6
movdqu 112(%r12), %xmm7
movups %xmm1, 32(%rsp)
movups %xmm2, 48(%rsp)
movups %xmm3, 64(%rsp)
movups %xmm4, 80(%rsp)
movups %xmm5, 96(%rsp)
movups %xmm6, 112(%rsp)
movups %xmm7, 128(%rsp)
.L10:
movl (%rbx), %esi
movl $_ZSt4cout, %edi
addq $4, %rbx
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
movl $1, %edx
leaq 15(%rsp), %rsi
movb $32, 15(%rsp)
movq %rax, %rdi
call std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
cmpq %rbp, %rbx
jne .L10
subq $-128, %r12
cmpq %r12, %r13
jne .L11
.L8:
addq $152, %rsp
popq %rbx
popq %rbp
popq %r12
popq %r13
ret
The lines between .L10:
and jne .L11
corresponds to the operator<<
function, and you can see large copying is done before that.
You should add &
between MyStruct
and oneStruct
in the for
loop to make it a reference and avoid unwanted data copies. Here is a part of a result with &
added:
test():
pushq %r12
pushq %rbp
pushq %rbx
subq $16, %rsp
movq myStructList(%rip), %rbp
movq myStructList+8(%rip), %r12
cmpq %r12, %rbp
je .L8
subq $-128, %rbp
.L11:
leaq -128(%rbp), %rbx
.L10:
movl (%rbx), %esi
movl $_ZSt4cout, %edi
addq $4, %rbx
call std::basic_ostream<char, std::char_traits<char> >::operator<<(int)
movl $1, %edx
leaq 15(%rsp), %rsi
movb $32, 15(%rsp)
movq %rax, %rdi
call std::basic_ostream<char, std::char_traits<char> >& std::__ostream_insert<char, std::char_traits<char> >(std::basic_ostream<char, std::char_traits<char> >&, char const*, long)
cmpq %rbp, %rbx
jne .L10
leaq 128(%rbp), %rax
cmpq %rbp, %r12
je .L8
movq %rax, %rbp
jmp .L11
.L8:
addq $16, %rsp
popq %rbx
popq %rbp
popq %r12
ret
Now you can see the large copying is eliminated and the pointer to the structure is directly used for execution of operator<<
.