2

I've successfully implemented BVH as described in PBRT. This one although has a slightly huge issue - the traversal looks through ALL nodes that intersect the ray, which is wrong (in terms of performance).

So I ended up optimizing the ray traversal, currently I use the version from Aila & Laine implementation of their "Understanding the efficiency of ray traveral on GPU". First, here is the code:

INLINE bool BVH::Traverse(TriangleWoop* prims, Ray* ray, IntersectResult* result)
{
unsigned int todo[32];
unsigned int todoOffset = 0;
unsigned int nodeNum = 0;

bool hit = false;
IntersectResult tmp = IntersectResult();
*(int*)&tmp.data.w = -1;
float tmin = 2e30f;

float4 origin = ray->origin;
float4 direction = ray->direction;
float4 invdir = rcp(direction);

float tmpx = 0.0f, tmpy = 0.0f;

while(true)
{
    while(this->nodes[nodeNum].prim_count == 0)
    {
        tmpx += 0.01f;
        tmpy += 0.001f;

        float4 c0v1 = (this->nodes[nodeNum + 1].bounds.minPt - origin) * invdir;
        float4 c0v2 = (this->nodes[nodeNum + 1].bounds.maxPt - origin) * invdir;
        float4 c1v1 = (this->nodes[this->nodes[nodeNum].above_child].bounds.minPt - origin) * invdir;
        float4 c1v2 = (this->nodes[this->nodes[nodeNum].above_child].bounds.maxPt - origin) * invdir;
        float4 c0n = f4min(c0v1, c0v2);
        float4 c0f = f4max(c0v1, c0v2);
        float4 c1n = f4min(c1v1, c1v2);
        float4 c1f = f4max(c1v1, c1v2);

        float n0 = max(c0n.x, max(c0n.y, c0n.z));
        float f0 = min(c0f.x, min(c0f.y, c0f.z));
        float n1 = max(c1n.x, max(c1n.y, c1n.z));
        float f1 = min(c1f.x, min(c1f.y, c1f.z));

        bool child0 = (f0 > 0.0f) && (n0 < f0);
        bool child1 = (f1 > 0.0f) && (n1 < f1);

        child0 &= (n0 < tmin);
        child1 &= (n1 < tmin);

        unsigned int nodeAddr = this->nodes[nodeNum].above_child;
        nodeNum = nodeNum + 1;

        if(child0 != child1)
        {
            if(child1)
            {
                nodeNum = nodeAddr;
            }
        }
        else
        {
            if(!child0)
            {
                if(todoOffset == 0)
                {
                    goto result;
                }

                nodeNum = todo[--todoOffset];
            }
            else
            {
                if(n1 < n0)
                {
                    swap(nodeNum, nodeAddr);
                }

                todo[todoOffset++] = nodeAddr;
            }
        }
    }

    if(this->nodes[nodeNum].prim_count > 0)
    {
        for(unsigned int i = this->nodes[nodeNum].prim_offset; i < this->nodes[nodeNum].prim_offset + this->nodes[nodeNum].prim_count; i++)
        {
            const TriangleWoop* tri = &prims[this->indexes[i]];

            if(IntersectRayTriangleWoop(ray, tri, &tmp))
            {
                if(tmp.data.z > 0.0f && tmp.data.z < result->data.z)
                {
                    tmin = tmp.data.z;
                    result->data.z = tmp.data.z;
                    result->data.x = tmp.data.x;
                    result->data.y = tmp.data.y;
                    *(int*)&result->data.w = this->indexes[i];
                    hit = true;
                }
            }
        }
    }

    if(todoOffset == 0)
    {
        goto result;
    }

    nodeNum = todo[--todoOffset];
}

result:
result->data.x = tmpx;
result->data.y = tmpy;

return hit;
}

Technically it's just a standard while-while stack ray-bvh traversal. Now to the main problem, look at next image (viewing sponza from outside), in color you can see how much nodes in BVH has been visited (full red = 100, full yellow = 1100): Sponza BVH "heat" map from outside

Next image shows similar situation inside: Sponza BVH "heat" map from inside

As you can see this is kind of a problem - it just has to traverse much more nodes than it's supposed to. Can someone see something wrong with my code? Any advice is welcomed as I'm stucked with this for few days already and can't think off some solution.

Olivier Moindrot
  • 27,908
  • 11
  • 92
  • 91
ZarakiKenpachi
  • 447
  • 3
  • 14

0 Answers0