I'm trying to figure out what is the best (maybe avx?) optimization for this code
typedef struct {
float x;
float y;
} vector;
vector add(vector u, vector v){
return (vector){u.x+v.x, u.y+v.y};
}
running gcc -S code.c
gives a quite long assembly code
.file "code.c"
.text
.globl add
.type add, @function
add:
.LFB0:
.cfi_startproc
pushq %rbp
.cfi_def_cfa_offset 16
.cfi_offset 6, -16
movq %rsp, %rbp
.cfi_def_cfa_register 6
movq %rdi, -8(%rbp)
movss 16(%rbp), %xmm1
movss 48(%rbp), %xmm0
addss %xmm0, %xmm1
movss 32(%rbp), %xmm2
movss 64(%rbp), %xmm0
addss %xmm2, %xmm0
movq -8(%rbp), %rax
movss %xmm1, (%rax)
movq -8(%rbp), %rax
movss %xmm0, 16(%rax)
movq -8(%rbp), %rax
popq %rbp
.cfi_def_cfa 7, 8
ret
.cfi_endproc
.LFE0:
.size add, .-add
.ident "GCC: (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609"
.section .note.GNU-stack,"",@progbits
while I expected very few instructions for a so simple task. Could someone help me to optimize this kind of code, while keeping float types?
Thanks.