I am writing an application that does the computation in C++ then returns the multi dimensional results as a numpy array using pybind11. From the documentation of pybind and from the examples seen online, the numpy array creation is basically passing the pointer of the array of data and enclosing details on the strides. In the C++ part however, I am not keen on using a one dimensional array
and using some fancy indexing, but I would rather use structs. That got me thinking whether (homogenous) variables placed in continous memory could be treated as part of an array
.
My train of thought was the following. The elements of an array
are placed in continous memory. The elements of a struct
are also placed continously in the order of their declaration (when padding is not involved). So the following four variable declarations are the same from the point of memory placement, e.g. if I were to point a pointer to the first element then I could iterate through all the elements by taking one integer worth of steps at a time:
struct struct_array
{
int elem[4] = {};
};
struct struct_ints
{
int a = {};
int b = {};
int c = {};
int d = {};
};
// integer matrix of shape 3x4
int one_dim_array[3 * 4] = {};
int two_dim_array[3][4] = {};
struct_array array_of_struct_arrays[3] = {};
struct_ints array_of_struct_ints[3] = {};
Here is my test code that suggest that the answer is yes to my question. It does some address printing, setting and reading elements.
#include <iostream>
struct struct_array
{
int elem[4] = {};
};
struct struct_ints
{
int a = {};
int b = {};
int c = {};
int d = {};
};
int main(void)
{
const int rows = 3;
const int cols = 4;
int one_dim_array[rows * cols] = {};
int two_dim_array[rows][cols] = {};
struct_array array_of_struct_arrays[rows] = {};
struct_ints array_of_struct_ints[rows] = {};
std::cout << sizeof(int) << " is the size of an int in bytes\n";
std::cout << "\nOne dim array\n";
for (int i = 0; i < 12; ++i)
{
one_dim_array[i] = i;
std::cout << &one_dim_array[i] << "\n";
}
std::cout << "\nTwo dim array\n";
for (int i = 0; i < rows; ++i)
{
for (int j = 0; j < cols; ++j)
{
two_dim_array[i][j] = i * cols + j;
std::cout << &two_dim_array[i][j] << "\n";
}
}
std::cout << "\nArray of struct arrays\n";
for (int i = 0; i < rows; ++i)
{
for (int j = 0; j < cols; ++j)
{
array_of_struct_arrays[i].elem[j] = i * cols + j;
std::cout << &array_of_struct_arrays[i] << " " << &array_of_struct_arrays[i].elem[j] << "\n";
}
}
std::cout << "\nArray of struct ints\n";
for (int i = 0; i < rows; ++i)
{
array_of_struct_ints[i].a = i * cols + 0;
array_of_struct_ints[i].b = i * cols + 1;
array_of_struct_ints[i].c = i * cols + 2;
array_of_struct_ints[i].d = i * cols + 3;
std::cout << &array_of_struct_ints[i] << " " << &array_of_struct_ints[i].a << "\n";
std::cout << &array_of_struct_ints[i] << " " << &array_of_struct_ints[i].b << "\n";
std::cout << &array_of_struct_ints[i] << " " << &array_of_struct_ints[i].c << "\n";
std::cout << &array_of_struct_ints[i] << " " << &array_of_struct_ints[i].d << "\n";
}
for (int i = 0; i < 4; ++i)
{
// Maybe using a reinterpret_cast would be more modern
void *void_p = nullptr;
switch (i)
{
case 0:
void_p = &one_dim_array;
std::cout << "\nOne dim array\n";
break;
case 1:
void_p = &two_dim_array;
std::cout << "\nTwo dim array\n";
break;
case 2:
void_p = &array_of_struct_arrays;
std::cout << "\nArray of struct arrays\n";
break;
case 3:
void_p = &array_of_struct_ints;
std::cout << "\nArray of struct ints\n";
}
int *int_p = (int *)void_p;
for (int i = 0; i < 12; ++i)
{
std::cout << *(int_p + i) << "\n";
}
}
std::cout << "Hello world!";
return 0;
}
Is this right or am I missing something? What are you thoughts on this matter? (Apart from that I should switch to std::array.) Thank you for your time!