I'm using gcc 12.2 on linux. I use -nostdlib
and the compiler complained about lack of memcpy and memmove. So I implemented a bad memcpy in assembly and I had memmove call abort since I always want to use memcpy.
I was wondering if I could avoid the compiler asking for memcpy (and memmove) if I implemented my own in C. The optimizer seems to notice what it really is and called the C function anyway. However since it was implemented (with me using #define memcpy mymemcpy
) and since I ran it, I saw my app abort. It called my memmove implementation instead of assembly memcpy. Why is gcc calling move instead of copy?
clang calls memcpy but gcc optimizes my code better so I use it for optimized builds
__attribute__ ((access(write_only, 1))) __attribute__((nonnull(1, 2)))
inline void mymemcpy(void *__restrict__ dest, const void *__restrict__ src, int size)
{
const unsigned char *s = (const unsigned char*)src;
unsigned char *d = (unsigned char*)dest;
while(size--) *d++ = *s++;
}
Reproducible
//dummy.cpp
extern "C" {
void*malloc() { return 0; }
int read() { return 0; }
int write() { return 0; }
int memcpy() { return 0; }
int memmove() { return 0; }
}
//main.cpp
#include <unistd.h>
#include <cstdlib>
struct MyVector {
void*p;
long long position, length;
};
__attribute__ ((access(write_only, 1))) __attribute__((nonnull(1, 2)))
void mymemcpy(void *__restrict__ dest, const void *__restrict__ src, int size)
{
const unsigned char *s = (const unsigned char*)src;
unsigned char *d = (unsigned char*)dest;
while(size--) *d++ = *s++;
}
//__attribute__ ((noinline))
int func(const char*file_from_disk, MyVector*v)
{
if (v->position + 5 <= v->length ) {
mymemcpy(v->p, file_from_disk, 5);
}
return 0;
}
char buf[4096];
extern "C"
int _start() {
MyVector v{malloc(1024),0,1024};
v.position += read(0, v.p, 1024-5);
int len = read(0, buf, 4096);
func(buf, &v);
write(1, v.p, v.position);
}
g++ -march=native -nostdlib -static -fno-exceptions -fno-rtti -O2 main.cpp dummy.cpp
Check using objdump -D a.out | grep call
401040: e8 db 00 00 00 call 401120 <memmove>
40108d: e8 4e 00 00 00 call 4010e0 <malloc>
4010a3: e8 48 00 00 00 call 4010f0 <read>
4010ba: e8 31 00 00 00 call 4010f0 <read>
4010c5: e8 56 ff ff ff call 401020 <_Z4funcPKcP8MyVector>
4010d5: e8 26 00 00 00 call 401100 <write>
402023: ff 11 call *(%rcx)