I am not sure about the performance, but here is one option.
You create a templated struct MD<N,M,L>
that takes array dimensions N,M,L
and has a static function slice
.
slice
takes a flat input range and one Slice
instance per dimension and returns a corresponding multidimensional range over the elements of the flat input range.
The Slice
instances are just structs containing a start index and an optional end index.
You can use deep_flatten
from this SO answer to prevent having to use nested for loops over the multidimensional range. Note that the returned range is just an input_range
, which does not have a rich interface.
#include <vector>
#include <ranges>
#include <cassert>
#include <iostream>
template <size_t dim>
struct Slice {
// default constructor leaves start at zero and end empty. Correspondes to the whole dimension
constexpr Slice() = default;
// Create a slice with a single index
constexpr Slice(size_t i) : begin(i), end(i+1) {
assert( (0 <= i) && (i < dim));
}
// Create a slice with a start and an end index
constexpr Slice(size_t s, size_t e) : begin(s), end(e+1) {
assert( (0 <= s) && (s <= e) && (e < dim) );
}
size_t begin {0};
size_t end {dim};
};
// An adaptor object to interpret a flat range as a multidimensional array
template <size_t dim, size_t... dims>
struct MD {
constexpr static auto dimensions = std::make_tuple(dim, dims...);
consteval static size_t size(){
if constexpr (sizeof...(dims) > 0) {
return dim*(dims * ...);
}
else {
return dim;
}
}
// returns a multidimensional range over the elements in the flat array
template <typename Rng>
constexpr static auto slice(
Rng&& range,
Slice<dim> const& slice,
Slice<dims> const&... slices
)
{
return slice_impl(range, 0, slice, slices...);
}
template <typename Rng>
constexpr static auto slice_impl(
Rng&& range,
size_t flat_index,
Slice<dim> const& slice,
Slice<dims> const&... slices
)
{
if constexpr (std::ranges::sized_range<Rng>) { assert(std::size(range) >= size()); }
static_assert(sizeof...(slices) == sizeof...(dims), "wrong number of slice arguments.");
if constexpr (sizeof...(slices) == 0)
{
// end recursion at inner most range
return range | std::views::drop(flat_index*dim + slice.begin) | std::views::take(slice.end - slice.begin);
}
else
{
// for every index to be kept in this dimension, recurse to the next dimension and increment the flat_index
return std::views::iota(slice.begin, slice.end) | std::views::transform(
[&range, flat_index, slices...](size_t i){
return MD<dims...>::slice_impl(range, flat_index*dim + i, slices...);
}
);
}
}
// convenience function for the full view
template <typename Rng>
constexpr static auto as_range(Rng&& range){
return slice(range, Slice<dim>{}, Slice<dims>{}...);
}
};
// recursively join a range of ranges
// https://stackoverflow.com/questions/63249315/use-of-auto-before-deduction-of-auto-with-recursive-concept-based-fun
template <typename Rng>
auto flat(Rng&& rng) {
using namespace std::ranges;
auto joined = rng | views::join;
if constexpr (range<range_value_t<decltype(joined)>>) {
return flat(joined);
} else {
return joined;
}
}
int main()
{
static_assert(MD<2,3,2>::size() == 12);
static_assert(std::get<0>(MD<2,3,2>::dimensions) == 2);
static_assert(std::get<1>(MD<2,3,2>::dimensions) == 3);
static_assert(std::get<0>(MD<2,3,2>::dimensions) == 2);
std::vector v = {1,2,3,4,5,6,7,8,9,10,11,12};
// obtain the full view of the data, interpreted as a 2x3x2 array
auto full = MD<2,3,2>::as_range(v);
// print the full view
std::cout << "data interpreted as 2x3x2 array:\n";
for (size_t i=0; i < full.size(); i++) {
std::cout << "index " << i << ":\n";
for (auto const& d3 : full[i]) {
for (auto const& val : d3) {
std::cout << val << " ";
}
std::cout << "\n";
}
}
std::cout << "\n";
auto sliced = MD<2,3,2>::slice(
v,
{}, // 1st dim: take all elements along this dim
{1,2}, // 2nd dim: take indices 1:2
{0} // 3rd dim: take only index 0
);
std::cout << "2x2x1 Slice with indices {{}, {1,2}, {0}} of the 2x3x2 data:\n";
for(size_t i=0; i < 2; ++i){ // index-based loop
for (size_t j=0; j < 2; ++j){
std::cout << sliced[i][j][0] << " ";
}
std::cout << "\n";
}
std::cout << "\n";
for(auto& val : flat(sliced)){
val *= val;
}
// print the whole flat data
std::cout << "\nThe whole data, after squaring all elements in sliced view:\n";
for (auto const& val : v){
std::cout << val << " ";
}
}
Output:
data interpreted as 2x3x2 array:
index 0:
1 2
3 4
5 6
index 1:
7 8
9 10
11 12
2x2x1 Slice with indices {{}, {1,2}, {0}} of the 2x3x2 data:
3 5
9 11
The whole data, after squaring all elements in sliced view:
1 2 9 4 25 6 7 8 81 10 121 12
Live Demo on godbolt compiler explorer
This is a prototype. I am sure the ergonomics can be improved.
Edit
A first quick and dirty benchmark of assigning a 6x6x6 view with another 6x6x6 view out of a 10x10x10:
Quickbench
A nested for loop over the multidimensional range is about 3 times slower than the traditional nested for-loop. Flattening the view using deep_flatten
/std::views::join
seems to make it 20-30 times slower. Apparently the compiler is having a hard time optimizing here.