I am fiddling around with an implementation of a generic dynamic array. The array should hold information about its size, how many entries are used, and then hold the actual data. The meta-information (size/used) is generic, but the data needs to handle different types, so I am handling that with macros. I am trying, however, to get the memory allocation code into functions. So my thought it is: I have a struct for meta-information
struct da_meta {
size_t size;
size_t used;
};
and then I have a macro that creates a struct per type, using a flexible array following the meta information:
#define dynarray(TYPE) \
struct { \
struct da_meta meta; \
TYPE data[]; \
}
I can declare an integer array, for example, as
dynarray(int) *int_array = 0;
To allocate and reallocate arrays, my thought was now to use code such as this:
#define size_overflow(meta_size, obj_size, len) \
((SIZE_MAX - meta_size) / obj_size < len)
// Always free if we cannot reallocate
void *realloc_dynarray_mem(void *p,
size_t meta_size,
size_t obj_size,
size_t new_len)
{
if (size_overflow(meta_size, obj_size, new_len))
goto abort;
struct da_meta *new_da =
realloc(p, meta_size + obj_size * new_len);
if (!new_da) goto abort;
new_da->size = new_len;
new_da->used = MIN(new_da->used, new_len);
return new_da;
abort:
free(p);
return 0;
}
The function gets the size of the struct sans the data objects, the size of individual objects, and the number of objects to allocate memory for. I don't use the size of the struct meta
type, because it might be too small depending on the alignment of the data objects, but I will get it from sizeof
the concrete (typed) structures. The function will always free the input and return NULL if I cannot allocate because in my application I have to give up if I cannot grow the array, so I don't try to preserve the old data in case there is an error.
There is nothing wrong with this code, as far as I can tell. I can always allocate memory, and as long as I have more than the size of struct meta
, I can set the variables there. But when I return the result and use it as a dynarray(T)
type, I am less sure. I think it should work, because C should put the memory of the first member of a struct first in a struct, and that is where I put struct meta
, but am I right here?
I create a new array like this:
void *new_dynarray_mem(size_t meta_size,
size_t obj_size,
size_t len)
{
struct da_meta *array =
realloc_dynarray_mem(0, meta_size, obj_size, len);
if (array) {
// we do set size in realloc, but
array->size = len;
// if used was not initialised in realloc (and it wasn't)
// then we have to set it here...
array->used = 0;
}
return array;
}
#define new_da(type, init_size) \
new_dynarray_mem(sizeof(dynarray(type)), \
sizeof(type), init_size)
Here, the macro new_da()
gets the size of the header/meta information from sizeof(dynarray(type))
and the size of the underlying types from sizeof(type)
. The second value is fine, but I am also uncertain about the first. Does the C standard guarantee that if I create two different structs with exactly the same code, e.g., calling dynarray(int)
twice, that I get the same memory layout? I cannot imagine a compiler that would give me a different layout for the same code, but when it comes to imagining what compilers get up to, I am quite limited.
For appending to the array, I think all is fine. There I do not generate new types but get the size from the existing dynamic array, so if the first allocation is standard compliant, then I think the appending is as well, but I could be wrong.
#define da_free(da) \
do { free(da); da = 0; } while(0)
#define grow(size) \
(((size) == 0) ? /* special case for zero */ \
1 : \
((size) > SIZE_MAX / 2) ? /* can we grow? */ \
0 : /* no, then report size zero */ \
(2 * (size))) /* double the size */
#define da_append(da, ...) \
do { \
if (da->meta.used == da->meta.size) { \
size_t new_size = grow(da->meta.size); \
if (new_size == 0) { da_free(da); break; } \
da = realloc_dynarray_mem( \
da, sizeof *da, *da->data, new_size \
); \
if (!da) break; \
} \
da->data[da->meta.used++] = __VA_ARGS__; \
} while (0)
Am I guaranteed that if I lay out the concrete dynamic arrays with the meta-information at the top of the structs, then I can treat the allocate memory as both a pointer to the meta-information and the array? Is it safe to assume that I get the same size and memory layout if I generate the same struct twice? I feel that it must be that way since it shouldn't differ from if I include the same header file twice, but since I am generating the code there might be something that I am missing.
EDIT Based on the comments, I have updated the code to that below, but I have left the original code (of course) so the comments make sense in terms of that.
#define da_at(da,i) (da->data[(i)])
#define da_len(da) (da->meta.used)
struct da_meta {
size_t size;
size_t used;
};
#define dynarr(TYPE) \
struct { \
struct da_meta meta; \
TYPE data[]; \
}
// Always free if we cannot reallocate
void *realloc_dynarray_mem(struct da_meta *p,
size_t meta_size,
size_t obj_size,
size_t new_len)
{
// Size size overflow?
if (((SIZE_MAX - meta_size) / obj_size < new_len))
goto fail;
struct da_meta *new_da =
realloc(p, meta_size + obj_size * new_len);
if (!new_da) goto fail;
new_da->size = new_len;
new_da->used = MIN(new_da->used, new_len);
return new_da;
fail:
free(p);
return 0;
}
void *new_dynarray_mem(size_t meta_size,
size_t obj_size,
size_t len)
{
struct da_meta *array =
realloc_dynarray_mem(0, meta_size, obj_size, len);
if (array) array->used = 0;
return array;
}
void *grow_dynarray_mem(struct da_meta *p,
size_t meta_size,
size_t obj_size)
{
// Can we double the length?
size_t used = meta_size - obj_size * p->size;
size_t adding = MAX(1, p->size);
if ((SIZE_MAX - used) / obj_size < adding) {
free(p);
return 0;
}
return realloc_dynarray_mem(
p, meta_size, obj_size, p->size + adding
);
}
#define new_da(da, init_size) \
new_dynarray_mem(sizeof *(da), \
sizeof *(da)->data, \
(init_size))
#define da_free(da) \
do { free(da); da = 0; } while(0)
#define da_append(da, ...) \
do { \
if (da->meta.used == da->meta.size) { \
da = grow_dynarray_mem( \
(struct da_meta *)da, \
sizeof *da, sizeof *da->data \
); \
if (!da) break; \
} \
da->data[da->meta.used++] = __VA_ARGS__; \
} while (0)
When used, the code can look like this:
int main(void)
{
dynarr(int) *int_array = new_da(int_array, 0);
if (!int_array) goto error;
printf("%zu out of %zu\n",
int_array->meta.used,
int_array->meta.size);
for (int i = 0; i < 5; i++) {
da_append(int_array, i);
if (!int_array) goto error;
}
for (int i = 0; i < da_len(int_array); i++) {
printf("%d ", da_at(int_array, i));
}
printf("\n");
da_free(int_array);
return 0;
error:
return 1;
}