-1

I appear to be using glMultiDrawIndirect (MDI) and/or DrawElementsIndirectCommand (DEIC) incorrectly as I am not able to render all objects correctly. The method attempting to reuse draw commands for similar objects & textures ('instancing'...ish), draws all object in all locations. The 'debug' method simply uses 1 draw command per object (two triangles), but the output is always too few objects, with the first object's location never being used for any object.

This is what happens when using the failed attempt at instancing: enter image description here

This is what happens when using the 'debug' method of one object (two triangles) per DEIC: debug render method

The goal would be to correctly use the instanceCount in DEIC to allow for something approaching instancing, while drawing the correct number of objects in the correct locations. My adventures in google-raiding has suggested that the baseInstance field of the DEIC could be used as the drawID if the DEICs are stored in a buffer. If this is impossible or if I am vastly misunderstanding the uses, please call me out and let me know! I have tried to include the smallest amount of applicable code to avoid a 10,000 word count post.

Below, I am creating "draw path" objects that are a collection of buffer IDs and vectors to be loaded into buffers (based on numerous variables unrelated to this question).

// VAO
glGenVertexArrays(1, &p->vertexArrayObject);
glBindVertexArray(p->vertexArrayObject);

// vertices
glCreateBuffers(1, &p->vertexBuffer);
glBindVertexBuffer(bindingIndex, p->vertexBuffer, 0, sizeof(Vertex));
glEnableVertexArrayAttrib(p->vertexArrayObject, 0);
glEnableVertexArrayAttrib(p->vertexArrayObject, 1);
glVertexAttribFormat(0, 3, GL_FLOAT, GL_FALSE, offsetof(Vertex, position));
glVertexAttribBinding(0, bindingIndex);
glVertexAttribFormat(1, 2, GL_FLOAT, GL_TRUE, offsetof(Vertex, uv));
glVertexAttribBinding(1, bindingIndex);

if(p->pathType == DrawPathType::FAST)
{
    glNamedBufferStorage(p->vertexBuffer, p->rbVertices.bufferSize, nullptr, m_persistentCreateFlags);
    p->rbVertices.ptr = (Vertex*)glMapNamedBufferRange(p->vertexBuffer, 0, p->rbVertices.bufferSize, m_persistentMapFlags);
    p->rbVertices.bufferFragment = p->rbVertices.bufferSize / 3;
}

// indices
glCreateBuffers(1, &p->indexBuffer);
glVertexArrayElementBuffer(p->vertexArrayObject, p->indexBuffer);

// draw commands
//      glCreateBuffers(1, &p->drawCmdBuffer);
//      glBindBuffer(GL_DRAW_INDIRECT_BUFFER, p->drawCmdBuffer);
//      glNamedBufferStorage(p->drawCmdBuffer, p->rbCommands.bufferSize, nullptr, m_persistentCreateFlags);
//      p->rbCommands.ptr = (DEIC*)glMapNamedBufferRange(p->drawCmdBuffer, 0, p->rbCommands.bufferSize, m_persistentMapFlags);
//      p->rbCommands.bufferFragment = p->rbCommands.bufferSize / 3;

// unsure how this works
//      glEnableVertexArrayAttrib(p->vertexArrayObject, 2);
//      glVertexAttribIFormat(2, 1, GL_UNSIGNED_INT, offsetof(DrawElementsIndirectCommand, baseInstance));
//      glVertexAttribBinding(2, bindingIndex);
//      glVertexBindingDivisor(bindingIndex, 1);

// draw IDs
glCreateBuffers(1, &p->drawIDBuffer);
glBindBuffer(GL_ARRAY_BUFFER, p->drawIDBuffer);
glEnableVertexAttribArray(2);
glVertexAttribIPointer(2, 1, GL_UNSIGNED_INT, sizeof(GLuint), 0);
glVertexAttribDivisor(2, 1);

// transforms
glCreateBuffers(1, &p->transformBuffer);
if(p->pathType == DrawPathType::LONG || p->pathType == DrawPathType::FAST)
{
    glNamedBufferStorage(p->transformBuffer, p->rbTransforms.bufferSize, nullptr, m_persistentCreateFlags);
    p->rbTransforms.ptr = (glm::mat4*)glMapNamedBufferRange(p->transformBuffer, 0, p->rbTransforms.bufferSize, m_persistentMapFlags);
    p->rbTransforms.bufferFragment = p->rbTransforms.bufferSize / 3;
    glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, p->transformBuffer);
}

// texture addresses
glCreateBuffers(1, &p->textureAddressBuffer);
glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, p->textureAddressBuffer);

This is the useful part of the 'renderPrep' function.

for(size_t i = 0; i < glyphs->size(); i++)
{
    auto it = glyphs->at(i);

    // ensure we have a valid texture address
    if(!it->textureAddress.defined())
    {
        Logger::getInstance().Log(Logs::CRIT, Logs::Drawing, "Renderer2D::drawPrep()", "Iteration [{}] of [{}] has a null texture address (0,0)!", i, glyphs->size());
        failed++;
    }
    else
    {
        offset = verts->size();
        for(int in = 0; in < QUAD_VERTS; in++) { indices->push_back(baseQuadIndices[in] + offset); }

        // creating our model space to world space matrix ('model' in "projection * view * model")
        glm::mat4 transRotate = glm::rotate(identMat, glm::radians(it->angle), glm::vec3(0.0f, 0.0f, 1.0f));
        transforms->push_back(transRotate);
        transforms->back() = glm::translate(transforms->back(), it->position);

        // push back modelspace vertices
        for(auto& v : it->vertices) { verts->push_back(v); }

        // update previous draw command or create a new one
        if(currentTex.exists() && currentTex == it->textureAddress)
        {
            // append previous draw command
            DEICs->back().vertexCount += QUAD_VERTS;
            DEICs->back().instanceCount += 1; // two triangles, thus two instances
        }
        else
        {
            // different texture, new draw command
            DEIC tmp = { QUAD_VERTS, 1, (inc * QUAD_VERTS), (inc * BASE_VERTS), inc };
            DEICs->push_back(tmp);
            currentTex = it->textureAddress;
        }

        /// \NOTE: Current issue is that the draw command is only drawing one object, in two iterations.
        ///     This is responsible for the blank second box
        /* DEIC tmp = { QUAD_VERTS, 1, (inc * QUAD_VERTS), (inc * BASE_VERTS), 0 };
        DEICs->push_back(tmp);
        texAddrs->push_back(it->textureAddress); */

        Logger::getInstance().Log(Logs::DEBUG, Logs::Drawing, "Renderer2D::drawPrep()",
            "\n\033[93mDEIC #{}\033[0m:\n\tvertCount\t\t{}\n\tinstCount\t\t{}\n\tfirstInd\t\t{}\n\tbaseVert\t\t{}\n\tbaseInst\t\t{}\n",
            DEICs->size(), DEICs->back().vertexCount, DEICs->back().instanceCount, DEICs->back().firstIndex, DEICs->back().baseVertex, DEICs->back().baseInstance);

        texAddrs->push_back(currentTex);
        p->drawIDs.push_back(inc);
        inc++;
    }
}

This is the snippet actually responsible for rendering.

int activeProgramID = 0; // currently used glsl program
glGetIntegerv(GL_CURRENT_PROGRAM, &activeProgramID);

// active passed glsl program id, or enable existing if not already enabled
if(glProgID > 0) { glUseProgram(glProgID); }
else if(activeProgramID == 0) { glUseProgram(m_prog->getProgramID()); }

// all clear, do it!
glBindVertexArray(p->vertexArrayObject);

// bind SSBOs, if applicable
if(p->transformBuffer) { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, p->transformBuffer); }
if(p->textureAddressBuffer) { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, p->textureAddressBuffer); }

// finally render
//if(p->drawCmdBuffer) { glBindBuffer(GL_DRAW_INDIRECT_BUFFER, p->drawCmdBuffer); glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, 0, p->drawCommands.size(), 0); }
//else { glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, p->drawCommands.data(), p->drawCommands.size(), 0); }
glMultiDrawElementsIndirect(GL_TRIANGLES, GL_UNSIGNED_INT, p->drawCommands.data(), p->drawCommands.size(), 0);

// update ring buffer(s), if applicable
if(p->rbCommands.ptr != nullptr) { m_bufferLockManager->lockRange(p->rbCommands.oldHead, p->rbCommands.bufferFragment); }
if(p->rbTransforms.ptr != nullptr) { m_bufferLockManager->lockRange(p->rbTransforms.oldHead, p->rbTransforms.bufferFragment); }
if(p->rbVertices.ptr != nullptr) { m_bufferLockManager->lockRange(p->rbVertices.oldHead, p->rbVertices.bufferFragment); }

// options specific to a "fast" draw path (if a fast draw path, glyphs are single use)
if(p->pathType == DrawPathType::FAST) { p->clear(true); }

// clean up
glBindVertexArray(0);

// change to previous glProgram
if(activeProgramID) { glUseProgram(activeProgramID); }
else { glUseProgram(0); }

EDIT #1, 2019-04-05 11:53a EST:

First, I forgot shaders! Apologies for missing this.

// --------- Vertex shader ------------
    // uniforms / shader_storage_buffer object
    layout(std140, binding = 0) buffer CB0 { mat4 Transforms[]; };

    // view & projection in one
    uniform mat4 ViewProjection;

    // input
    layout(location = 0) in vec3 In_v3Pos;
    layout(location = 1) in vec2 In_v2TexCoord;
    layout(location = 2) in uint In_uiDrawID;

    // output
    out DrawBlock
    {
        vec2 v2TexCoord;
        flat uint iDrawID;
    } Out;

    void main()
    {
        mat4 World = Transforms[In_uiDrawID + gl_InstanceID];
        vec4 worldPos = World * vec4(In_v3Pos, 1.0);
        gl_Position = ViewProjection * worldPos;
        Out.v2TexCoord = In_v2TexCoord;
        Out.iDrawID = In_uiDrawID;
    }

// --------- Fragment shader ------------
struct TexAddress
    {
        sampler2DArray arr;
        float slice;
    };

    layout (std430, binding = 1) buffer CB1 { TexAddress texAddress[]; };

    // input
    in DrawBlock
    {
        vec2 v2TexCoord;
        flat uint iDrawID;
    } In;

    // output
    layout(location = 0) out vec4 Out_v4Color;

    vec4 Texture(TexAddress addr, vec2 uv) { return texture(addr.arr, vec3(uv, addr.slice)); }

    void main()
    {
        int DrawID = int(In.iDrawID);
        Out_v4Color = vec4(Texture(texAddress[DrawID], In.v2TexCoord).xyz, 1.0f);
    }

If I removed the drawIDs block using non-DSA and replace with the snippet below, it draws white triangles focusing on the center of the screen.

glCreateBuffers(1, &p->drawCmdBuffer);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, p->drawCmdBuffer);
glNamedBufferStorage(p->drawCmdBuffer, p->rbCommands.bufferSize, nullptr, m_persistentCreateFlags);
p->rbCommands.ptr = (DEIC*)glMapNamedBufferRange(p->drawCmdBuffer, 0, p->rbCommands.bufferSize, m_persistentMapFlags);
p->rbCommands.bufferFragment = p->rbCommands.bufferSize / 3;

glEnableVertexArrayAttrib(p->vertexArrayObject, 2);
glVertexAttribIFormat(2, 1, GL_UNSIGNED_INT, offsetof(DrawElementsIndirectCommand, baseInstance));
glVertexAttribBinding(2, bindingIndex);
glVertexBindingDivisor(2, 1);

Outcome: outcome using DEIC baseInst


EDIT #2 @ 2019-04-06 12:09p EST: Created a gist on github with the full header/source for the renderer. Link : https://gist.github.com/bbilyeu/bbf74ef4eaf979b5d2b4f2c2a9dcce48

Beau B.
  • 31
  • 7
  • 3
    Your code seems really confused as to what it wants to do. It's half-DSA style, half-separate attribute format, and half-VertexAttribPointer. You need to settle on one style of how to set up stuff and stick with it. It's really hard to tell where there may be mistakes when your code seems to be at war with itself. – Nicol Bolas Apr 05 '19 at 20:21
  • The attribute format was mostly me lazily debugging, but you make a valid point. I will be mindful of this in future questions. – Beau B. Apr 06 '19 at 02:10

1 Answers1

4

The number of elements, instances and draw commands are all different, and refer to different things. In particular, using any multi-draw command does not necessarily mean that you do any instancing.

The following code uses one glMultiDrawArraysIndirect call to issue two drawing commands:

  1. Render three instances of the "quad" object
  2. Render five instances of the "triangle" object

Data

struct Vert {
    float position[2];
};

struct Inst {
    float position[4]; // as two corners
    float color[4];
};

static const Vert verts[] = {
    // quad
    { { 0, 0 } }, // vertex 0
    { { 1, 0 } },
    { { 0, 1 } },
    { { 1, 1 } },
    
    // triangle
    { { 0, 0 } }, // vertex 4
    { { 1, 0 } },
    { { 0.5, 1 } },
};

static const Inst insts[] = {
    // three quads
    { { -0.8, -0.8, -0.6, -0.6 }, { 1, 0, 0, 1 } }, // instance 0
    { { -0.1, -0.8, 0.1, -0.6 }, { 0, 1, 0, 1 } },
    { { 0.6, -0.8, 0.8, -0.6 }, { 0, 0, 1, 1 } },
    
    // five triangles
    { { -0.8, 0.6, -0.6, 0.8 }, { 1, 1, 0, 1 } }, // instance 3
    { { -0.4, 0.6, -0.2, 0.8 }, { 0.1, 0.1, 0.1, 1 } },
    { { -0.1, 0.6, 0.1, 0.8 }, { 0, 1, 1, 1 } },
    { { 0.2, 0.6, 0.4, 0.8 }, { 0.1, 0.1, 0.1, 1 } },
    { { 0.6, 0.6, 0.8, 0.8 }, { 1, 0, 1, 1 } },
};

static const DrawArraysIndirectCommandSN cmds[] = {
    // quads: 4 vertices, 3 instances, first vertex=0, first instance=0
    { 4, 3, 0, 0 },

    // triangles: 3 vertices, 5 instances, first vertex=4, first instance=3
    { 3, 5, 4, 3 },
};

The initialization code:

GLuint buf[3]; // vertex, instance, draw-command buffers
glCreateBuffers(3, buf);
glNamedBufferStorage(buf[0], sizeof(verts), verts, 0);
glNamedBufferStorage(buf[1], sizeof(insts), insts, 0);
glNamedBufferStorage(buf[2], sizeof(cmds), cmds, 0);

GLuint va;
glCreateVertexArrays(1, &va);
glVertexArrayVertexBuffer(va, 0, buf[0], 0, sizeof(Vert));
glVertexArrayVertexBuffer(va, 1, buf[1], 0, sizeof(Inst));
glVertexArrayBindingDivisor(va, 1, 1);
glVertexArrayAttribBinding(va, 0, 0);
glEnableVertexArrayAttrib(va, 0);
glVertexArrayAttribFormat(va, 0, 2, GL_FLOAT, 0, offsetof(Vert, position));
glVertexArrayAttribBinding(va, 1, 1);
glEnableVertexArrayAttrib(va, 1);
glVertexArrayAttribFormat(va, 1, 4, GL_FLOAT, 0, offsetof(Inst, position));
glVertexArrayAttribBinding(va, 2, 1);
glEnableVertexArrayAttrib(va, 2);
glVertexArrayAttribFormat(va, 2, 4, GL_FLOAT, 0, offsetof(Inst, color));

The rendering part:

glClear(GL_COLOR_BUFFER_BIT);
glBindProgramPipeline(pp.get());
glBindVertexArray(va);
glBindBuffer(GL_DRAW_INDIRECT_BUFFER, buf[2]);
glMultiDrawArraysIndirect(GL_TRIANGLE_STRIP, 0, sizeof(cmds)/sizeof(cmds[0]), 0);

The vertex shader:

#version 450 core
layout(location = 0) in vec2 position;
layout(location = 1) in vec4 inst_position;
layout(location = 2) in vec4 color;

out gl_PerVertex {
    vec4 gl_Position;
};

layout(location = 0) out PerVertex {
    vec4 color;
} OUT;

void main() {
    OUT.color = color;
    gl_Position = vec4(mix(inst_position.xy, inst_position.zw, position), 0, 1);
}

The fragment shader:

#version 450 core
layout(location = 0) in PerVertex {
    vec4 color;
} IN;
layout(location = 0) out vec4 OUT;
void main() {
    OUT = IN.color;
}

The result:

enter image description here

Yakov Galka
  • 70,775
  • 16
  • 139
  • 220
  • I updated my example at the top to use the baseInstance + gl_InstanceID to grab the transform. It partially works in that the first two vertices appear to be in the correct place...but that's all that is correct. This feels like I am overlooking or over-complicating something incredibly basic. – Beau B. Apr 06 '19 at 15:59
  • 1
    It's hard to understand your code. What do you try to achieve with `In_uiDrawID + gl_InstanceID`? Why do you mix multi-draw with instancing? Why do you use `DrawElements` where `DrawArrays` is sufficient? If you want to draw a bunch of textured rectangles then you can use plain instancing without any multi-draw. Also you don't need general 4x4 transforms for that. Pass `vec4`s through a VAO attribute. You don't need the shader buffer for textures either. Send your bindless textures as a VAO attribute too. Just start with the code I posted that works, understand it fully, and then play from it. – Yakov Galka Apr 06 '19 at 16:17