My other post intends to collect general information on the kinds of GLSL spinlocks, but unfortunately nothing has come of it, nor has it solved my problem. Therefore, a specific question. I reduced my problem down to a minimal example, presented below:
The trivial problem makes a screen-sized texture of locks and texture of color. In pass one, the colors are all set to zero (shader 1). In pass two, two triangles are drawn, which the geometry shader quadruples and slightly offsets (shader 2). The fragment shader atomically increments the texture's color. In pass three, the color is visualized (shader 3).
Shader 1:
//Vertex
#version 440
uniform mat4 mat_P;
in vec4 _vec_vert_a;
void main(void) {
gl_Position = mat_P*_vec_vert_a;
}
//Fragment
#version 440
layout(rgba32f) coherent uniform image2D img0;
void main(void) {
imageStore(img0,ivec2(gl_FragCoord.xy),vec4(0.0,0.0,0.0,1.0));
discard;
}
Shader 2:
//Vertex
#version 440
in vec4 _vec_vert_a;
out vec4 vert_vg;
void main(void) {
vert_vg = _vec_vert_a;
}
//Geometry
#version 440
#define REPS 4
layout(triangles) in;
layout(triangle_strip,max_vertices=3*REPS) out;
uniform mat4 mat_P;
in vec4 vert_vg[3];
void main(void) {
for (int rep=0;rep<REPS;++rep) {
for (int i=0;i<3;++i) {
vec4 vert = vert_vg[i];
vert.xy += vec2(5.0*rep);
gl_Position = mat_P*vert; EmitVertex();
}
EndPrimitive();
}
}
//Fragment
#version 440
layout(rgba32f) coherent uniform image2D img0;
layout(r32ui) coherent uniform uimage2D img1;
void main(void) {
ivec2 coord = ivec2(gl_FragCoord.xy);
bool have_written = false;
do {
bool can_write = (imageAtomicExchange(img1,coord,1u)!=1u);
if (can_write) {
vec4 data = imageLoad(img0,coord);
data.xyz += vec3(1.0,0.0,0.0);
imageStore(img0,coord,data);
memoryBarrier();
imageAtomicExchange(img1,coord,0);
have_written = true;
}
} while (!have_written);
discard;
}
Shader 3:
//Vertex
#version 440
uniform mat4 mat_P;
in vec4 _vec_vert_a;
void main(void) {
gl_Position = mat_P*_vec_vert_a;
}
#version 440
layout(rgba32f) coherent uniform image2D img0;
void main(void) {
vec4 data = imageLoad(img0,ivec2(gl_FragCoord.xy));
gl_FragData[0] = vec4(data.rgb/4.0, 1.0); //tonemap
}
Main Loop:
- Enable Shader 1
- render fullscreen quad
glMemoryBarrier(GL_ALL_BARRIER_BITS);
Enable Shader 2
- Render two small triangles
glMemoryBarrier(GL_ALL_BARRIER_BITS);
Enable Shader 3
- render fullscreen quad
Note that in steps 3 and 6 I [think I ]could have used GL_SHADER_IMAGE_ACCESS_BARRIER_BIT. Just in case, I'm being conservative.
The visualized colors jitter with time, and are mostly fairly small. This shows that atomicity is not happening. Can someone sanity check this procedure? Am I missing anything?
EDIT: From this page, I found that using discard
can make image load/store undefined in the fragment. I removed discards, but the problem still occurs. I also found layout(early_fragment_tests) in;
, which forces early fragment tests (it didn't help either).