8

I am seeing a problem with operator new[]:

#include <stdlib.h>
#include <stdio.h>

class V4 { public:
    float v[ 4 ];
    V4() {}
    void *operator new( size_t sz ) { return aligned_alloc( 16, sz ); } 
    void *operator new[]( size_t sz ) { printf( "sz: %zu\n", sz ); return aligned_alloc( 16, sz ); }
    void operator delete( void *p, size_t sz ) { free( p ); }
  //void operator delete[]( void *p, size_t sz ) { free( p ); }
};

class W4 { public:
    float w[ 4 ];
    W4() {}
    void *operator new( size_t sz ) { return aligned_alloc( 16, sz ); } 
    void *operator new[]( size_t sz ) { printf( "sz: %zu\n", sz ); return aligned_alloc( 16, sz ); }
    void operator delete( void *p, size_t sz ) { free( p ); }
    void operator delete[]( void *p, size_t sz ) { free( p ); }
};

int main( int argc, char **argv ) { 

    printf( "sizeof( V4 ): %zu\n", sizeof( V4 ));
    V4 *p = new V4[ 1 ];
    printf( "p: %p\n", p );

    printf( "sizeof( W4 ): %zu\n", sizeof( W4 ));
    W4 *q = new W4[ 1 ];
    printf( "q: %p\n", q );

    exit(0);
}

Produces:

$ g++ -Wall main.cpp && ./a.out
sizeof( V4 ): 16
sz: 16
p: 0x55be98a10030
sizeof( W4 ): 16
sz: 24
q: 0x55be98a10058

Why does the alloc size increase to 24 when I include the operator delete[]? This is screwing up my aligned malloc.

$ g++ --version
g++ (Debian 7.2.0-18) 7.2.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

From looking at other questions, it seems as though the extra 8 bytes may be being used to store the array size. Even if this is expected behaviour, why is it triggered by operator delete[], and what is the correct procedure for allocating memory-aligned arrays?

EDIT Thanks, the linked questions appear to be relevant. I still think the question as asked needs an answer, however. It ought to be possible to change the example code to produce memory-aligned arrays without recourse to std::vector, in my opinion. My current thinking is that it will be necessary to allocate a yet-larger block of bytes which are 16-byte aligned, and return the pointer such that the initial 8 bytes bring the rest of the block to alignment on the 16-byte boundary. The delete[] operator would then have to perform the reverse operation before calling free(). This is pretty disgusting, but I think it is required to satisfy both the calling code (C runtime?) (which requires its 8 bytes for size storage) - and the use case which is to get 16-byte aligned Vector4s.

EDIT The linked answer is certainly relevant, but it does not address the problem of ensuring correct memory alignment.

EDIT It looks like this code will do what I want, but I don't like the magic number 8 in delete[]:

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

class W16 { public:

    float w[ 16 ];
    W16() {}
    void *operator new( size_t sz ) { return aligned_alloc( 16, sz ); } 
    void *operator new[]( size_t sz ) {
        size_t r = sz % sizeof( W16 );
        size_t ofs = sizeof( W16 ) - r;
        size_t _sz = sz + ofs;
        void *p1 = aligned_alloc( sizeof( W16 ), _sz );
        void *p2 = ((uint8_t *) p1) + ofs;
        printf( "sizeof( W16 ): %zx, sz: %zx, r: %zx, ofs: %zx, _sz: %zx\np1: %p\np2: %p\n\n", sizeof( W16 ), sz, r, ofs, _sz, p1, p2 );
        return p2;
    }
    void operator delete( void *p, size_t sz ) { free( p ); }
    void operator delete[]( void *p, size_t sz ) {
        void *p1 = ((int8_t*) p) + 8 - sizeof( W16 );
        printf( "\np2: %p\np1: %p", p, p1 );
        free( p1 );
    }
};

int main( int argc, char **argv ) {

    printf( "sizeof( W16 ): %zx\n", sizeof( W16 ));
    W16 *q = new W16[ 16 ];
    printf( "&q[0]: %p\n", &q[0] );
    delete[] q;
}

Output:

$ g++ -Wall main.cpp && ./a.out 
sizeof( W16 ): 40
sizeof( W16 ): 40, sz: 408, r: 8, ofs: 38, _sz: 440
p1: 0x559876c68080
p2: 0x559876c680b8

&q[0]: 0x559876c680c0

p2: 0x559876c680b8
p1: 0x559876c68080

EDIT Title changed from feedback in comments. I don't think this is a 'duplicate' of the linked answer anymore, though I don't know if I can get it removed.

PatB
  • 415
  • 1
  • 5
  • 13
  • 1
    Unrelated to your problem, but the result of the `sizeof` operator is of type `size_t` which can't be used with the `"%d"` format specifier. Use e.g. `"%zu"` instead. – Some programmer dude Jan 03 '18 at 13:58
  • Thanks - perhaps g++ could do with a compiler warning on this. – PatB Jan 03 '18 at 13:59
  • 3
    *"perhaps g++ could do with a compiler warning on this"* https://wandbox.org/permlink/TkhBFwqvhSCLgY60 You have to actually enable the warnings. – Baum mit Augen Jan 03 '18 at 14:01
  • By the way, this example is boiled down from a Vector4f class which was working (in terms of aligned malloc) when compiled for windows. This problem has cropped up in the course of converting the code to run on linux. – PatB Jan 03 '18 at 14:02
  • 5
    Related: https://stackoverflow.com/questions/45781692/parameter-size-of-member-operator-new-in-c/45782370#45782370 – Holt Jan 03 '18 at 14:03
  • 4
    _perhaps g++ could do with a compiler warning on this_ don't use printf, use cout. –  Jan 03 '18 at 14:09
  • 8
    If you are defining `new[]` but not `delete[]`, the compiler probably assumes you will never call the standard `delete[]`, because it would be UB to do so. So it doesn't add the 8 bytes it needs for bookkeeping. You probably don't want to use `new[]` anyway. Use `std::vector` with your own allocator instead. If you insist on `new[]`, round up the amount of memory you allocate so that it is divisible by the alignment, and don't use the initial portion. If you are asked to allocate e.g. `x=16n+8` bytes, allocate `p=aligned_alloc(x+8)` and return `p+8`; in `operator delete[]` call `free(p-8)`. – n. m. could be an AI Jan 03 '18 at 14:24
  • 1
    There's also an overaligned `operator new` in C++17, you may want to use it if your compiler supports it. – n. m. could be an AI Jan 03 '18 at 14:26
  • _"The linked answer is certainly relevant, but it does not address the problem of ensuring correct memory alignment."_ It answers the question in your title and the 1st question in the body. Herein lies the problem of asking 'bonus' questions later in the text. – underscore_d Jan 03 '18 at 15:52
  • Can't you just use `alignas(16)` instead of overriding new/delete? – Useless Jan 03 '18 at 16:03

1 Answers1

2

It looks as though this will do for me:

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

inline void *array_alloc( size_t sz_obj, size_t sz_req ) {
    size_t r = sz_req % sz_obj;
    size_t ofs = sz_obj - r;
    size_t sz = sz_req + ofs;
    void *p1 = aligned_alloc( sz_obj, sz );
    void *p2 = (void*) (((uintptr_t ) p1) + ofs);
  //printf( "sz_obj: %zx, sz_req: %zx, r: %zx, ofs: %zx, sz: %zx\np1: %p\np2: %p\n\n", sz_obj, sz_req, r, ofs, sz, p1, p2 );
    return p2;
}

inline void array_free( size_t sz_obj, void *p2 ) {
    void *p1 = (void*) (((uint8_t*)p2) - (((uintptr_t)p2) % sz_obj));
  //printf( "\np2: %p\np1: %p", p2, p1 );
    free( p1 );
}

class W16 { public:

    float w[ 16 ];
    W16() {}
    void *operator new( size_t sz ) { return aligned_alloc( 16, sz ); } 
    void *operator new[]( size_t sz ) { return array_alloc( sizeof( W16 ), sz ); }
    void operator delete( void *p, size_t sz ) { free( p ); }
    void operator delete[]( void *p, size_t sz ) { array_free( sizeof( W16 ), p ); }
};

int main( int argc, char **argv ) {
  //printf( "sizeof( W16 ): %zx\n", sizeof( W16 ));
    W16 *q = new W16[ 16 ];
    printf( "&q[0]: %p\n", &q[0] );
    delete[] q;
}

EDIT Thanks to n.m., this code works without a magic number.

PatB
  • 415
  • 1
  • 5
  • 13
  • 1
    If `aligned_alloc` returns an address that is a multiple of `sz_obj`, then what you pass to `free` must also be a multiple of `sz_obj`. So your magic number is `((uintptr_t)p2) % sz_obj`. – n. m. could be an AI Jan 03 '18 at 20:34
  • I think you mean 'p2 - (((uint8_t)p2)%sz_obj)', which is equivalent to the expression in array_free(). You can turn on the printfs in the code to verify it. – PatB Jan 03 '18 at 20:43
  • 1
    no, not `(uint8_t)p2`! `((uintptr_t)p2) % sz_obj` is the magic number. `(uint8_t)*p2 - ((uintptr_t)p2) % sz_obj` is the pointer you pass to `free`. – n. m. could be an AI Jan 03 '18 at 20:53