I am doing some work with sdfs, evaluating them both on the GPU and the CPU. Some of my sdfs are analytic, some are grid based. explanation on what an sdf is.
A grid based sdf is just a 3D array with floating point values on each cell.
I implemented dual contouring to create meshes out of these sdfs. But I have a huge optimization issue. At the same DC resolution, an analytic sdf takes a minute and a half, a grid sdf takes hours.
The algorithm is exactly the same,only the Sdf evalaution function is different so the bottleneck is evaluating the grid sdf model (I verified this with a profiler).
I am trying to figure out how to reduce computation time for the following:
struct GridSdfMetaData
{
float step_size;
uint x_resolution;
uint y_resolution;
uint z_resolution;
float x_origin;
float y_origin;
float z_origin;
};
inline float BilinearInterpolation(
const std::array<float, 4> &values,
const std::array<float, 2> &weights)
{
const float m1 = std::lerp(values[0], values[1], weights[0]);
const float m2 = std::lerp(values[2], values[3], weights[0]);
return std::lerp(m1, m2, weights[1]);
}
inline float TrilinearInterpolation(
const std::array<float, 8> &values,
const std::array<float, 3> &weights)
{
const float m1 = BilinearInterpolation({values[0], values[1], values[2], values[3]}, {weights[0], weights[1]});
const float m2 = BilinearInterpolation({values[4], values[5], values[6], values[7]}, {weights[0], weights[1]});
return std::lerp(m1, m2, weights[2]);
}
float SampleGridData(
const Eigen::Vector3f &point,
const GridSdfMetaData& grid_meta_data,
const std::vector<float>& grid_data)
{
using vec3 = Eigen::Vector3f;
using ivec3 = Eigen::Vector3i;
const vec3 relative_p =
point - vec3(grid_meta_data.x_origin, grid_meta_data.y_origin, grid_meta_data.z_origin);
const vec3 normalized_p = vec3(
relative_p.x() / (grid_meta_data.step_size),
relative_p.y() / (grid_meta_data.step_size),
relative_p.z() / (grid_meta_data.step_size));
ivec3 discrete_p = {normalized_p.x(), normalized_p.y(), normalized_p.z()};
discrete_p.x() = clamp(int(discrete_p.x()), 0, int(grid_meta_data.x_resolution - 1));
discrete_p.y() = clamp(int(discrete_p.y()), 0, int(grid_meta_data.y_resolution - 1));
discrete_p.z() = clamp(int(discrete_p.z()), 0, int(grid_meta_data.z_resolution - 1));
float v000 = ((float*)(grid_data.data()))[ //
discrete_p.z() * grid_meta_data.y_resolution * grid_meta_data.x_resolution +
discrete_p.y() * grid_meta_data.x_resolution +
discrete_p.x()];
float v001 = ((float*)(grid_data.data()))[ //
discrete_p.z() * grid_meta_data.y_resolution * grid_meta_data.x_resolution +
discrete_p.y() * grid_meta_data.x_resolution +
std::min(uint(discrete_p.x()) + 1, grid_meta_data.x_resolution - 1)];
float v010 = ((float*)(grid_data.data()))[ //
discrete_p.z() * grid_meta_data.y_resolution * grid_meta_data.x_resolution +
std::min(uint(discrete_p.y()) + 1, grid_meta_data.y_resolution - 1) * grid_meta_data.x_resolution +
discrete_p.x()];
float v011 = ((float*)(grid_data.data()))[ //
discrete_p.z() * grid_meta_data.y_resolution * grid_meta_data.x_resolution +
std::min(uint(discrete_p.y()) + 1, grid_meta_data.y_resolution - 1) * grid_meta_data.x_resolution +
std::min(uint(discrete_p.x()) + 1, grid_meta_data.x_resolution - 1)];
float v100 = ((float*)(grid_data.data()))[ //
std::min(uint(discrete_p.z()) + 1, grid_meta_data.z_resolution - 1) * grid_meta_data.y_resolution * grid_meta_data.x_resolution +
discrete_p.y() * grid_meta_data.x_resolution +
discrete_p.x()];
float v101 = ((float*)(grid_data.data()))[ //
std::min(uint(discrete_p.z()) + 1, grid_meta_data.z_resolution - 1) * grid_meta_data.y_resolution * grid_meta_data.x_resolution +
discrete_p.y() * grid_meta_data.x_resolution +
std::min(uint(discrete_p.x()) + 1, grid_meta_data.x_resolution - 1)];
float v110 = ((float*)(grid_data.data()))[ //
std::min(uint(discrete_p.z()) + 1, grid_meta_data.z_resolution - 1) * grid_meta_data.y_resolution * grid_meta_data.x_resolution +
std::min(uint(discrete_p.y()) + 1, grid_meta_data.y_resolution - 1) * grid_meta_data.x_resolution +
discrete_p.x()];
float v111 = ((float*)(grid_data.data()))[ //
std::min(uint(discrete_p.z()) + 1, grid_meta_data.z_resolution - 1) * grid_meta_data.y_resolution * grid_meta_data.x_resolution +
std::min(uint(discrete_p.y()) + 1, grid_meta_data.y_resolution - 1) * grid_meta_data.x_resolution +
std::min(uint(discrete_p.x()) + 1, grid_meta_data.x_resolution - 1)];
float dx = normalized_p.x() - floor(normalized_p.x());
float dy = normalized_p.y() - floor(normalized_p.y());
float dz = normalized_p.z() - floor(normalized_p.z());
return TrilinearInterpolation({v000, v001, v010, v011, v100, v101, v110, v111}, {dx, dy, dz});
}
Since this is what is consuming the vast majority of the DC time for grid sdfs (about 90%)