2

I've created this GLSL Compute Shader and compiled it using "glslangValidator.exe". However, it will only ever update the "Particles[i].Velocity" values and not any other values and this only happens in some instances. I've checked that the correct input values are sent in using "RenderDoc".

Buffer Usage Flag Bits

VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT

And the Property Flag Bits

VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT

GLSL Shader

#version 450
#extension GL_ARB_separate_shader_objects : enable

struct Particle
{
  vec3 Position;
  vec3 Velocity;
  vec3 IPosition;
  vec3 IVelocity;

  float LifeTime;
  float ILifetime;
};

layout(binding = 0) buffer Source
{
   Particle Particles[ ];
};

layout(binding = 1) uniform UBO
{
  mat4 model;
  mat4 view;
  mat4 proj;
  float time;
};

vec3 Gravity = vec3(0.0f,-0.98f,0.0f);
float dampeningFactor = 0.5;

void main(){
  uint i = gl_GlobalInvocationID.x;
  if(Particles[i].LifeTime > 0.0f){
    Particles[i].Velocity = Particles[i].Velocity + Gravity * dampeningFactor * time;
    Particles[i].Position = Particles[i].Position + Particles[i].Velocity * time;
    Particles[i].LifeTime = Particles[i].LifeTime - time;
  }else{
    Particles[i].Velocity = Particles[i].IVelocity;
    Particles[i].Position = Particles[i].IPosition;
    Particles[i].LifeTime = Particles[i].ILifetime;
  }
}

Descriptor Set Layout Binding

        VkDescriptorSetLayoutBinding descriptorSetLayoutBindings[2] = {
            { 0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, 0 },
        { 1, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 1, VK_SHADER_STAGE_COMPUTE_BIT, 0 }
        };

The Command Dispatch

vkCmdDispatch(computeCommandBuffers, MAX_PARTICLES , 1, 1);

The Submitting of the Queue

            VkSubmitInfo cSubmitInfo = {};
            cSubmitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;

            cSubmitInfo.commandBufferCount = 1;
            cSubmitInfo.pCommandBuffers = &computeCommandBuffers;

            if (vkQueueSubmit(computeQueue.getQueue(), 1, &cSubmitInfo, computeFence) != VK_SUCCESS) {
                throw std::runtime_error("failed to submit compute command buffer!");
            }

            vkWaitForFences(device.getDevice(), 1, &computeFence, VK_TRUE, UINT64_MAX);

UPDATE: 13/05/2017 (More Information Added)

Particle Struct Definition in CPP

struct Particle {
    glm::vec3 location;
    glm::vec3 velocity;
    glm::vec3 initLocation;
    glm::vec3 initVelocity;

    float lifeTime;
    float initLifetime;
}

Data Mapping to Storage Buffer

            void* data;
            vkMapMemory(device.getDevice(), stagingBufferMemory, 0, bufferSize, 0, &data);
            memcpy(data, particles, (size_t)bufferSize);
            vkUnmapMemory(device.getDevice(), stagingBufferMemory);

            copyBuffer(stagingBuffer, computeBuffer, bufferSize);

Copy Buffer Function (by Alexander Overvoorde from vulkan-tutorial.com)

        void copyBuffer(VkBuffer srcBuffer, VkBuffer dstBuffer, VkDeviceSize size) {
            VkCommandBufferAllocateInfo allocInfo = {};
            allocInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
            allocInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
            allocInfo.commandPool = commandPool.getCommandPool();
            allocInfo.commandBufferCount = 1;

            VkCommandBuffer commandBuffer;
            vkAllocateCommandBuffers(device.getDevice(), &allocInfo, &commandBuffer);

            VkCommandBufferBeginInfo beginInfo = {};
            beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
            beginInfo.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;

            vkBeginCommandBuffer(commandBuffer, &beginInfo);

            VkBufferCopy copyRegion = {};
            copyRegion.size = size;
            vkCmdCopyBuffer(commandBuffer, srcBuffer, dstBuffer, 1, &copyRegion);

            vkEndCommandBuffer(commandBuffer);

            VkSubmitInfo submitInfo = {};
            submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
            submitInfo.commandBufferCount = 1;
            submitInfo.pCommandBuffers = &commandBuffer;

            vkQueueSubmit(graphicsQueue.getQueue(), 1, &submitInfo, VK_NULL_HANDLE);
            vkQueueWaitIdle(graphicsQueue.getQueue());

            vkFreeCommandBuffers(device.getDevice(), commandPool.getCommandPool(), 1, &commandBuffer);

        }
  • How do You know members are not updated? Do You align storage buffer members in memory? – Ekzuzy May 12 '18 at 20:36
  • The Storage Buffer is used as vertex input for the vertex shader, and I can see that only some of the particles move. Furthermore, I've checked and compared the same values from different frames using the aforementioned Graphics Debugger (RenderDoc). – Hasan Al-Baghdadi May 13 '18 at 00:27
  • And for the second question, sorry I don't know what Aligning memory mean, is it the same as Allocating memiry and Binding the object to it? – Hasan Al-Baghdadi May 13 '18 at 00:29
  • Storage and uniform buffer members must be placed at appropriate offsets (from the beginning of buffer's memory) which depend on the member's data type (or more specifically it's size). For example, if a member's size has N bytes (float has 4) than it must be placed in memory offset that is a multiple of N. BUT if a member is a vector with 3 elements, it must be placed at offsets that are a multiple of 4N. So Your Position member may start at 0 offset. But Velocity cannot be right after Position, but must be placed at 128 offset (4 * 32). – Ekzuzy May 13 '18 at 05:32
  • There are several rules for these offsets. They depend on the basic type, number of members (four vectors), if a member is a struct or array. There are slight differences between rules for uniform and storage buffer members. Check specification or Intel's tutorial: software.intel.com/en-us/articles/api-without-secrets-introduction-to-vulkan-part-7 – Ekzuzy May 13 '18 at 05:35
  • I think I added all the information you asked for under the update. All the data I'm sending is in the Particle Struct and as far as I'm aware no offset is required for that, please tell me if I'm wrong. – Hasan Al-Baghdadi May 13 '18 at 14:35
  • 1
    @HasanAl-Baghdadi: [Please stop using 3-element vectors in UBOs/SSBOs. It never works the way you think it does](https://stackoverflow.com/q/38172696/734069) – Nicol Bolas May 14 '18 at 14:35
  • @HasanAl-Baghdadi Now we agreed with Nicol Bolas on the specification's wording and You can read the corrected answer - sorry for the confusion. Align Your structure members to a multiple of 16, similarly its vec3 members need to be aligned to a multiple of 16 too. – Ekzuzy May 14 '18 at 17:08

1 Answers1

2

Have a look at this StackOverflow question:

Memory allocation with std430 qualifier

FINAL, CORRECTED ANSWER:

In Your case the biggest member of Your structure is vec3 (3-element vector of floats). Base alignment of vec3 is the same as alignment of vec4. So the base alignment of Your array's elements is equal to 16 bytes. This means that each element of Your array has to start at an address that is a multiple of 16.

But alignment rules have to be applied for each structure member recursively. 3-element vectors has the same alignment as 4-element vectors. This means that:

  • Position member starts at the same alignment as each array member
  • Velocity, IPosition and IVelocitymembers must start at multiples of 16 bytes after the beginning of a given array element.
  • LifeTime and ILifeTime members have a 4-bytes alignment.

So the total size of Your struct in bytes is equal to:

  • Position - 16 bytes (Position itself takes 12 bytes, but next member has a 16-byte alignment)
  • Velocity - 16 bytes
  • IPosition - 16 bytes
  • IVelocity + LifeTime - 16 bytes
  • ILifeTime - 4 bytes

which gives 68 bytes. So, as far as I understand it, You need a 12-byte padding at the end of Your structure (additional 12 bytes between array elements) because each array element must start at addresses which are a multiple of 16.

So the first array element starts at offset 0 of the memory bound to the storage buffer. But the second array element must start at offset 80 from the begging of the memory (nearest multiple of 16 greater than 68) and so on.

Or, as @NicolBolas commented, to make life easier, pack everything in vec4 members only ;-).

BETTER THOUGH NOT FULLY CORRECT ANSWER:

In Your case the biggest member of Your structure is vec3 (3-element vector of floats). So the base alignment of Your array's elements is equal to 12 bytes (in case of arrays of structs in std430 layout, the base alignment don't have to be rounded up to mach alignment of 4-element vectors. <- Here I was wrong. We don't have to round up structure's base alignment, but the alignment of its members is calculated normally, with vec3 alignment being the same as vec4 alignment). This means that each element of Your array has to start at an address that is a multiple of 12 (no, in this case it should start at a multiple of 16).

But alignment rules have to be applied for each structure member recursively. 3-element vectors has the same alignment as 4-element vectors. This means that:

  • Position member starts at the same alignment as each array member
  • Velocity, IPosition and IVelocitymembers must start at multiples of 16 bytes after the beginning of a given array element.
  • LifeTime and ILifeTime members have a 4-bytes alignment.

So the total size of Your struct in bytes is equal to:

  • Position - 16 bytes (Position itself takes 12 bytes, but next member has a 16-byte alignment)
  • Velocity - 16 bytes
  • IPosition - 16 bytes
  • IVelocity + LifeTime - 16 bytes
  • ILifeTime - 4 bytes

which gives 68 bytes. So, as far as I understand it, You need a 4-byte padding at the end of Your structure (additional 4 bytes between array elements) because each array element must start at addresses which are a multiple of 12 (again, we need 12-byte padding here so the next array elements starts at a multiple of 16, not 12).

So the first array element starts at offset 0 of the memory bound to the storage buffer. But the second array element must start at offset 72 from the begging of the memory (nearest multiple of 12 greater than 68) and so on.

PREVIOUS, WRONG ANSWER:

In Your case the biggest member is vec3 (3-element vector of floats). It's alignment is equal to 12 bytes (in case of arrays of structs we don't have to round alignment of 3-element vectors to mach alignment of 4-element vectors). The size of Your struct in bytes equals to 56. So, as far as I understand it, You need a 4-byte padding at the end of Your structure (additional 4 bytes between array elements) because each array element must start at addresses which are a multiple of 12.

Ekzuzy
  • 3,193
  • 1
  • 16
  • 14
  • Even in std430, the base alignment of a `vec3` is *always* 16 bytes. std430 only matters for structs/arrays with 2 element vectors. It changes nothing about `vec3`'s. – Nicol Bolas May 14 '18 at 14:34
  • @NicolBolas From std430 layout rules: *Structure alignment is the same as the alignment for the biggest structure member, where three-component vectors are not rounded up to the size of four-component vectors. Each structure will start on this alignment, and its size will be the space needed by its members, according to the previous rules, rounded up to a multiple of the structure alignment.* – Ekzuzy May 14 '18 at 16:07
  • @NicolBolas And from the OpenGL specification: *When using the std430 storage layout, shader storage blocks will be laid out in buffer storage identically to uniform and shader storage blocks using the std140 layout, except that the base alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are not rounded up a multiple of the base alignment of a vec4.* – Ekzuzy May 14 '18 at 16:10
  • "*From std430 layout rules*" I cannot find anything in OpenGL 4.6 or GLSL 4.60 to corroborate those statements. I have found this in the standard: "If the member is a structure, the base alignment of the structure is N, where N is the largest base alignment value of any of its members, and rounded up to the base alignment of a vec4." There is no exception here for "three-component vectors are not rounded up to the size of four-component vectors". – Nicol Bolas May 14 '18 at 16:22
  • And note that rule 3 is the one that says that 3-element vectors have the alignment of 4-element vectors, not rules 4 or 9. So that std430 rule *only* applies to the rounding specified in rules 4 or 9, not the rounding specified in rule 3. – Nicol Bolas May 14 '18 at 16:24
  • Yes. This rules applies only to the base alignment of array's elements. But each structure member has the same, general alignment rules. So as the whole structure/array's elements alignment doesn't have to be rounded up to match 4-element vector's alignment, the alignment of its members still has to. I've corrected my answer. – Ekzuzy May 14 '18 at 16:31
  • "*So the base alignment of Your array's elements is equal to 12 bytes*" That would mean that the base alignment of a struct was *smaller* than the alignment requirement of its members. Meaning that the `vec3`s inside of it could be *misaligned*. That kind of removes the point of alignment. As stated in rule 9 of the specification, a structure has the maximum alignment of its members. `vec3` has an alignment of 16, so a struct containing a `vec3` must have an alignment no less than 16. – Nicol Bolas May 14 '18 at 16:31
  • @NicolBolas This is taken directly from the core OpenGL 4.6 spec: *Rule 9: If the member is a structure, the base alignment of the structure is N, where N is the largest base alignment value of any of its members, and rounded up to the base alignment of a vec4 The individual members of this substructure are then assigned offsets by applying this set of rules recursively, where the base offset of the first member of the sub-structure is equal to the aligned offset of the structure.* – Ekzuzy May 14 '18 at 16:37
  • @NicolBolas But under these rules, in the same core OpenGL 4.6 spec, there is a note: *When using the std430 storage layout, shader storage blocks will be laid out in buffer storage identically to uniform and shader storage blocks using the std140 layout, except that the base alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are not rounded up a multiple of the base alignment of a vec4.* – Ekzuzy May 14 '18 at 16:38
  • "*except that the base alignment and stride of arrays of scalars and vectors in rule 4 and of structures in rule 9 are not rounded up a multiple of the base alignment of a vec4*" Note that this does not mention rule 3. As such, `vec3`'s base alignment is ***still 16***. Therefore, any struct which contains a `vec3` will have a base alignment of 16, in accord with rule 9. – Nicol Bolas May 14 '18 at 16:39
  • Let us [continue this discussion in chat](https://chat.stackoverflow.com/rooms/171020/discussion-between-nicol-bolas-and-ekzuzy). – Nicol Bolas May 14 '18 at 16:40