I'm trying to implement a multipass compute shader for image processing. There is an input image and an output image in each pass. The next pass' input image is the previous ones' output.
This is the first time for me using compute shader in OpenGL so there may be some problems with my setup. I'm using OpenCV's Mat as the container to read/copy operations.
There are some parts of the code which isn't related to the problem so I didn't include. Some of these parts include loading the image or initializing the context.
Initialization:
//texture init
glGenTextures(1, &feedbackTexture_);
glActiveTexture(GL_TEXTURE0);
glBindTexture(GL_TEXTURE_2D, feedbackTexture_);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glBindTexture(GL_TEXTURE_2D, 0);
glGenTextures(1, &resultTexture_);
glActiveTexture(GL_TEXTURE0+1);
glBindTexture(GL_TEXTURE_2D, resultTexture_);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glBindTexture(GL_TEXTURE_2D, 0);
// shader init
computeShaderID = glCreateShader(GL_COMPUTE_SHADER);
glShaderSource(computeShaderID, 1, &computeShaderSourcePtr, &computeShaderLength);
glCompileShader(computeShaderID);
programID = glCreateProgram();
glAttachShader(programID, computeShaderID);
glLinkProgram(programID);
glDeleteShader(computeShaderID);
Shader Code:
//shader code (simple invert)
#version 430
layout (local_size_x = 1, local_size_y = 1) in;
layout (location = 0, binding = 0, /*format*/ rgba32f) uniform readonly image2D inImage;
layout (location = 1, binding = 1, /*format*/ rgba32f) uniform writeonly image2D resultImage;
uniform writeonly image2D image;
void main()
{
// Acquire the coordinates to the texel we are to process.
ivec2 texelCoords = ivec2(gl_GlobalInvocationID.xy);
// Read the pixel from the first texture.
vec4 pixel = imageLoad(inImage, texelCoords);
pixel.rgb = 1. - pixel.rgb;
imageStore(resultImage, texelCoords, pixel);
}
Usage:
cv::Mat image = loadImage().clone();
cv::Mat result(image.rows,image.cols,image.type());
// These get the appropriate enums used by glTexImage2D
GLenum internalformat = GLUtils::getMatOpenGLImageFormat(image);
GLenum format = GLUtils::getMatOpenGLFormat(image);
GLenum type = GLUtils::getMatOpenGLType(image);
int dispatchX = 1;
int dispatchY = 1;
for ( int i = 0; i < shaderPasses_.size(); ++i)
{
// Update textures
glBindTexture(GL_TEXTURE_2D, feedbackTexture_);
glTexImage2D(GL_TEXTURE_2D, 0, internalformat, result.cols, result.rows, 0, format, type, result.data);
glBindTexture(GL_TEXTURE_2D, resultTexture_);
glTexImage2D(GL_TEXTURE_2D, 0, internalformat, image.cols, image.rows, 0, format, type, 0);
glBindTexture(GL_TEXTURE_2D, 0);
glClear(GL_COLOR_BUFFER_BIT);
std::shared_ptr<Shader> shaderPtr = shaderPasses_[i];
// Enable shader
shaderPtr->enable();
{
// Bind textures
// location = 0, binding = 0
glUniform1i(0,0);
// binding = 0
glBindImageTexture(0, feedbackTexture_, 0, GL_FALSE, 0, GL_READ_ONLY, internalformat);
// location = 1, binding = 1
glUniform1i(1,1);
// binding = 1
glBindImageTexture(1, resultTexture_, 0, GL_FALSE, 0, GL_WRITE_ONLY, internalformat);
// Dispatch rendering
glDispatchCompute((GLuint)image.cols/dispatchX,(GLuint)image.rows/dispatchY,1);
// Barrier will synchronize
glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT);
}
// disable shader
shaderPtr->disable();
// Here result is now the result of the last pass.
}
Sometimes I get strange results (glitchy textures, partially rendered textures), also the first pixel (at 0,0) is sometimes not written. Did I set up everything correctly or am I something missing? It seems that this method with the textures is really slow, is there any alternative which will increase performance?
Edit1: Changed memorybarrier flag.