EDIT: I edited both the question and its title to be more precise.
Considering the following source code:
#include <vector>
struct xyz {
xyz() { } // empty constructor, but the compiler doesn't care
xyz(const xyz& o): v(o.v) { }
xyz& operator=(const xyz& o) { v=o.v; return *this; }
int v; // <will be initialized to int(), which means 0
};
std::vector<xyz> test() {
return std::vector<xyz>(1024); // will do a memset() :-(
}
...how can I avoid the memory allocated by the vector<> to be initialized with copies of its first element, which is a O(n) operation I'd rather skip for the sake of speed, since my default constructor does nothing ?
A g++ specific solution will do, if no generic one exists (but I couldn't find any attribute to do that).
EDIT: generated code follows (command line: arm-elf-g++-4.5 -O3 -S -fno-verbose-asm -o - test.cpp | arm-elf-c++filt | grep -vE '^[[:space:]]+[.@].*$' )
test():
mov r3, #0
stmfd sp!, {r4, lr}
mov r4, r0
str r3, [r0, #0]
str r3, [r0, #4]
str r3, [r0, #8]
mov r0, #4096
bl operator new(unsigned long)
add r1, r0, #4096
add r2, r0, #4080
str r0, [r4, #0]
stmib r4, {r0, r1}
add r2, r2, #12
b .L4 @
.L8: @
add r0, r0, #4 @
.L4: @
cmp r0, #0 @ fill the memory
movne r3, #0 @
strne r3, [r0, #0] @
cmp r0, r2 @
bne .L8 @
str r1, [r4, #4]
mov r0, r4
ldmfd sp!, {r4, pc}
EDIT: For the sake of completeness, here is the assembly for x86_64:
.globl test()
test():
LFB450:
pushq %rbp
LCFI0:
movq %rsp, %rbp
LCFI1:
pushq %rbx
LCFI2:
movq %rdi, %rbx
subq $8, %rsp
LCFI3:
movq $0, (%rdi)
movq $0, 8(%rdi)
movq $0, 16(%rdi)
movl $4096, %edi
call operator new(unsigned long)
leaq 4096(%rax), %rcx
movq %rax, (%rbx)
movq %rax, 8(%rbx)
leaq 4092(%rax), %rdx
movq %rcx, 16(%rbx)
jmp L4 @
L8: @
addq $4, %rax @
L4: @
testq %rax, %rax @ memory-filling loop
je L2 @
movl $0, (%rax) @
L2: @
cmpq %rdx, %rax @
jne L8 @
movq %rcx, 8(%rbx)
movq %rbx, %rax
addq $8, %rsp
popq %rbx
leave
LCFI4:
ret
LFE450:
EH_frame1:
LSCIE1:
LECIE1:
LSFDE1:
LASFDE1:
LEFDE1:
EDIT: I think the conclusion is to not use std::vector<>
when you want to avoid unneeded initialization. I ended up unrolling my own templated container, which performs better (and has specialized versions for neon and armv7).