Problems converting YV12 to RGB through GLSL

Question

I'm trying to accomplish YV12 to RGB conversion mentioned in this post with GLSL shaders.

My application loads a raw YV12 frame from the disk and tries to perform the conversion using GLSL shaders. However, the resulting image is flipped vertically and has some color issues. I think the problem may be that the image is being read as an array of char (1 byte) and then converted to an array of GLushort (2 bytes). What do you think?

This is how the raw YUV frame looks like:

enter image description here

and the raw frame loaded by the application can be downloaded from here.

and this is the output I'm getting:

enter image description here

I'm sharing the source code of the application below:

#include <assert.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <GL/glew.h>
#include <GL/glut.h>
#include <GL/glu.h>

#include <iostream>

#include <fstream>

#ifndef SEEK_SET
#  define SEEK_SET 0
#endif

static GLfloat Xrot = 0, Yrot = 0, Zrot = 0;
static GLint ImgWidth, ImgHeight;
static GLushort *ImageYUV = NULL;


static void DrawObject(void)
{
   glBegin(GL_QUADS);

   glTexCoord2f(0, 0);
   glVertex2f(-1.0, -1.0);

   glTexCoord2f(ImgWidth, 0);
   glVertex2f(1.0, -1.0);

   glTexCoord2f(ImgWidth, ImgHeight);
   glVertex2f(1.0, 1.0);

   glTexCoord2f(0, ImgHeight);
   glVertex2f(-1.0, 1.0);

   glEnd();
}

static void Display( void )
{
   glClear( GL_COLOR_BUFFER_BIT );

   glPushMatrix();
      glRotatef(Xrot, 1.0, 0.0, 0.0);
      glRotatef(Yrot, 0.0, 1.0, 0.0);
      glRotatef(Zrot, 0.0, 0.0, 1.0);
      DrawObject();
   glPopMatrix(); 

   glutSwapBuffers();
}

static void Reshape( int width, int height )
{
   glViewport( 0, 0, width, height );
   glMatrixMode( GL_PROJECTION );
   glLoadIdentity();

   // Vertical flip so texture appears right
   glFrustum( -1.0, 1.0, 1.0, -1.0, 10.0, 100.0 ); 
   //glFrustum( -1.0, 1.0, -1.0, 1.0, 10.0, 100.0 );

   glMatrixMode( GL_MODELVIEW );
   glLoadIdentity();
   glTranslatef( 0.0, 0.0, -15.0 );
}

static void Key( unsigned char key, int x, int y )
{
   (void) x;
   (void) y;
   switch (key) {
      case 27:
         exit(0);
         break;
   }
   glutPostRedisplay();
}

static void SpecialKey( int key, int x, int y )
{
   float step = 3.0;
   (void) x;
   (void) y;

   switch (key) {
      case GLUT_KEY_UP:
         Xrot += step;
         break;
      case GLUT_KEY_DOWN:
         Xrot -= step;
         break;
      case GLUT_KEY_LEFT:
         Yrot += step;
         break;
      case GLUT_KEY_RIGHT:
         Yrot -= step;
         break;
   }
   glutPostRedisplay();
}        

bool CheckShader(int n_shader_object)
{
    int n_tmp;
    glGetShaderiv(n_shader_object, GL_COMPILE_STATUS, &n_tmp);
    bool b_compiled = n_tmp == GL_TRUE;
    int n_log_length;
    glGetShaderiv(n_shader_object, GL_INFO_LOG_LENGTH, &n_log_length);
    // query status ...

    if(n_log_length > 1) {
        char *p_s_temp_info_log;
        if(!(p_s_temp_info_log = (char*)malloc(n_log_length)))
            return false;
        int n_tmp;
        glGetShaderInfoLog(n_shader_object, n_log_length, &n_tmp,
            p_s_temp_info_log);
        assert(n_tmp <= n_log_length);

        fprintf(stderr, "%s\n", p_s_temp_info_log);
        free(p_s_temp_info_log);
    }
    // get/concat info-log

    return b_compiled;
}

static void Init( int argc, char *argv[] )
{
   GLuint texObj = 100;
   const char *file;

   printf("Checking GL_ARB_texture_rectangle\n");
   if (!glutExtensionSupported("GL_ARB_texture_rectangle")) {
      printf("Sorry, GL_ARB_texture_rectangle is required\n");
      exit(0);
   }

   glPixelStorei(GL_UNPACK_ALIGNMENT, 1);

   glBindTexture(GL_TEXTURE_RECTANGLE_ARB, texObj);
#ifdef LINEAR_FILTER
   /* linear filtering looks much nicer but is much slower for Mesa */
   glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
   glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
#else
   glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   glTexParameteri(GL_TEXTURE_RECTANGLE_ARB, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
#endif

    std::ifstream yuv_file("data.yv12", std::ios::in | std::ios::binary | std::ios::ate);
    if (!yuv_file.is_open())
    {
        std::cout << "> GLWidget::GLWidget !!! Failed to load yuv file";
        return;
    }
    int yuv_file_sz = yuv_file.tellg();

    ImgWidth = 1280;
    ImgHeight = 720;
    ImageYUV = new GLushort[yuv_file_sz];

    char* memblock = new char[yuv_file_sz];
    if (!memblock)
    {
        std::cout << "> GLWidget::GLWidget !!! Failed to allocate memblock";
        return;
    }

    yuv_file.seekg(0, std::ios::beg);
    yuv_file.read(memblock, yuv_file_sz);
    yuv_file.close();

    // A simple "memcpy(ImageYUV, memblock, yuv_file_sz);" 
    // won't work because the data read is stored as char (1 byte) and GLushort is 2 bytes.
    // So, doing a manual copy:
    for (int i = 0; i < yuv_file_sz; i++)
    {
        ImageYUV[i] = (GLushort)memblock[i];
    }
    delete[] memblock;

   printf("Image: %dx%d\n", ImgWidth, ImgHeight);

   glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,
                GL_LUMINANCE_ALPHA, ImgWidth, ImgHeight, 0,
                GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, ImageYUV);

   assert(glGetError() == GL_NO_ERROR);

   glTexSubImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,
                   0, 0, ImgWidth, ImgHeight,
                   GL_LUMINANCE_ALPHA, GL_UNSIGNED_BYTE, ImageYUV);

   assert(glGetError() == GL_NO_ERROR);

   delete[] ImageYUV;

   glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE);

   glEnable(GL_TEXTURE_RECTANGLE_ARB);

   glShadeModel(GL_FLAT);
   glClearColor(0.3, 0.3, 0.4, 1.0);

    static const char *p_s_vertex_shader =
        "varying vec2 t;"
        "void main()"
        "{"
        "    t = gl_MultiTexCoord0.xy;"
        "    gl_Position = ftransform();"
        "}";
    static const char *p_s_fragment_shader =
        "#extension GL_ARB_texture_rectangle : enable\n"
        "varying vec2 t;"
        "uniform sampler2DRect tex;"
        "void main()"
        "{"
        "    vec2 tcEven = vec2(floor(t.x * .5) * 2.0, t.y);"
        "    vec2 tcOdd = vec2(tcEven.x + 1.0, t.y);"
        "    float Cb = texture2DRect(tex, tcEven).x - .5;"
        "    float Cr = texture2DRect(tex, tcOdd).x - .5;"
        "    float y = texture2DRect(tex, t).w;" // redundant texture read optimized away by texture cache
        "    float r = y + 1.28033 * Cr;"
        "    float g = y - .21482 * Cb - .38059 * Cr;"
        "    float b = y + 2.12798 * Cb;"
        "    gl_FragColor = vec4(r, g, b, 1.0);"
        "}";

    int v = glCreateShader(GL_VERTEX_SHADER);
    int f = glCreateShader(GL_FRAGMENT_SHADER);
    int p = glCreateProgram();
    glShaderSource(v, 1, &p_s_vertex_shader, 0);
    glShaderSource(f, 1, &p_s_fragment_shader, 0);
    glCompileShader(v);
    CheckShader(v);
    glCompileShader(f);
    CheckShader(f);
    glAttachShader(p, v);
    glAttachShader(p, f);
    glLinkProgram(p);
    glUseProgram(p);
    glUniform1i(glGetUniformLocation(p, "tex"), 0);

   if (argc > 1 && strcmp(argv[1], "-info")==0) {
      printf("GL_RENDERER   = %s\n", (char *) glGetString(GL_RENDERER));
      printf("GL_VERSION    = %s\n", (char *) glGetString(GL_VERSION));
      printf("GL_VENDOR     = %s\n", (char *) glGetString(GL_VENDOR));
      printf("GL_EXTENSIONS = %s\n", (char *) glGetString(GL_EXTENSIONS));
   }
}


int main( int argc, char *argv[] )
{
   glutInit( &argc, argv );
   glutInitWindowSize( 1280, 720 );
   glutInitWindowPosition( 0, 0 );
   glutInitDisplayMode( GLUT_RGB | GLUT_DOUBLE);
   glutCreateWindow(argv[0] );
   glewInit();

   Init( argc, argv );

   glutReshapeFunc( Reshape );
   glutKeyboardFunc( Key );
   glutSpecialFunc( SpecialKey );
   glutDisplayFunc( Display );

   glutMainLoop();
   return 0;
}

Face Bloke · Accepted Answer · 2012-01-24T07:57:21.443

The problem here is that the image is actually not YV12, the chrominance and luminance planes are not interleaved, but are laid out in blocks. This could be solved in two ways, either interleave the planes before loading that into the texture and use the rest of the code as is, or it could be done in shader. I removed iostream and replaced it with stdio (i'm using rather old compiler). Here is my code for loading the image and interleaving it:

GLubyte *memblock;
{
    FILE *p_fr = fopen("data.yv12", "rb");
    if(!p_fr) {
        fprintf(stderr, "!!! Failed to load yuv file\n");
        return;
    }
    fseek(p_fr, 0, SEEK_END);
    int yuv_file_sz = ftell(p_fr);
    fseek(p_fr, 0, SEEK_SET);
    memblock = new GLubyte[yuv_file_sz];
    if(!memblock) {
        fprintf(stderr, "!!! Failed to allocate memblock\n");
        return;
    }
    fread(memblock, yuv_file_sz, 1, p_fr);
    fclose(p_fr);
}
// load .raw file

ImgWidth = 1280;
ImgHeight = 720;
ImageYUV = new GLushort[ImgWidth * ImgHeight];
// allocate an image

int chromaWidth = ImgWidth / 2;
int chromaHeight = ImgHeight / 2; // 2x2 luminance subsampling
const GLubyte *pCb = memblock + ImgWidth * ImgHeight; // Cb block after Y
const GLubyte *pCr = pCb + chromaWidth * chromaHeight; // Cr block after Cb
// get pointers to smaller Cb and Cr blocks (the image is *not* interleaved)

for(int i = 0; i < ImgWidth * ImgHeight; ++ i) {
    int x = i % ImgWidth;
    int y = i / ImgWidth;
    GLubyte cb = pCb[(x / 2) + (y / 2) * chromaWidth];
    GLubyte cr = pCr[(x / 2) + (y / 2) * chromaWidth];
    ImageYUV[i] = (memblock[i] << 8) | ((x & 1)? cr : cb);
}
// convert (interleave) the data to YV12

This is pretty straightforward, and can be used with the shader above.

Now what if we wanted to skip the interleaving? First, i'm going to figure out how the addressing works here (we're going to act like the image is a little bit higher monochrome image, the chrominance planes taking space above the luminance plane):

for(int y = 0; y < ImgHeight; ++ y) {
    for(int x = 0; x < ImgWidth; ++ x) {
        int CbY = ImgHeight + (y / 4);
        int CrY = ImgHeight + chromaHeight / 2 + (y / 4);
        int CbCrX = (x / 2) + chromaWidth * ((y / 2) & 1);
        // calculate x, y of cr and cb pixels in the grayscale image
        // where the Y, Cb anc Cr blocks are next to each other

        assert(&memblock[CbCrX + CbY * ImgWidth] == &pCb[(x / 2) + (y / 2) * chromaWidth]);
        assert(&memblock[CbCrX + CrY * ImgWidth] == &pCr[(x / 2) + (y / 2) * chromaWidth]);
        // make sure the addresses are correct (and they are)

        GLubyte cb = memblock[CbCrX + CbY * ImgWidth];
        GLubyte cr = memblock[CbCrX + CrY * ImgWidth];
        GLubyte Y = memblock[x + y * ImgWidth];

        ImageYUV[x + y * ImgWidth] = (Y << 8) | ((x & 1)? cr : cb);
    }
}
// convert (interleave) the data to YV12 (a little bit different way, use physical layout in memory)

That has pretty much the same effect. Now we can take the code that calculates the positions and put it in the shader.

static const char *p_s_fragment_shader =
    "#extension GL_ARB_texture_rectangle : enable\n"
    "uniform sampler2DRect tex;"
    "uniform float ImgHeight, chromaHeight_Half, chromaWidth;"
    "void main()"
    "{"
    "    vec2 t = gl_TexCoord[0].xy;" // get texcoord from fixed-function pipeline
    "    float CbY = ImgHeight + floor(t.y / 4.0);"
    "    float CrY = ImgHeight + chromaHeight_Half + floor(t.y / 4.0);"
    "    float CbCrX = floor(t.x / 2.0) + chromaWidth * floor(mod(t.y, 2.0));"
    "    float Cb = texture2DRect(tex, vec2(CbCrX, CbY)).x - .5;"
    "    float Cr = texture2DRect(tex, vec2(CbCrX, CrY)).x - .5;"
    "    float y = texture2DRect(tex, t).x;" // redundant texture read optimized away by texture cache
    "    float r = y + 1.28033 * Cr;"
    "    float g = y - .21482 * Cb - .38059 * Cr;"
    "    float b = y + 2.12798 * Cb;"
    "    gl_FragColor = vec4(r, g, b, 1.0);"
    "}";

By using this shader, we can directly upload the raw data to a texture, except it is a little bit higher and only GL_LUMINANCE:

glTexImage2D(GL_TEXTURE_RECTANGLE_ARB, 0,
                GL_LUMINANCE, ImgWidth, ImgHeight + ImgHeight / 2, 0, // !!
                GL_LUMINANCE, GL_UNSIGNED_BYTE, memblock); // !!

I will leave it at that. Here are complete source codes:

interleaving in shader (faster, preferrable)
manual interleaving in "C"

Sorry for the quick end, i will have problems if i don't leave my workplace ASAP :).

+1 For such a **high quality answer**. This should be up voted a lot! — karlphillip, Jan 23 '12 at 23:11
On Linux, with Both ATI and Intel graphic cards the [result is yellow-ish](http://i41.tinypic.com/2ynmrl0.jpg). Do you know what could be happening? — karlphillip, Jan 24 '12 at 11:30
By the way, our codes present the result flipped vertically (upside down). To fix that, simply change the `glFrustum()` call inside `Reshape()` to `glFrustum( -1.0, 1.0, 1.0, -1.0, 10.0, 100.0 );`. I fixed mine in the question. — karlphillip, Jan 24 '12 at 12:14
Swap Cr and Cb in the shader. That should do it. Oh, and sorry about the upside-down thing, i completely forgot about that, was finishing the answer in a rush. — Face Bloke, Jan 24 '12 at 13:33
Thanks, that did the trick! But its a mistery why ATI/Intel On Linux needs this swap and NVIDIA on Windows don't. I'm starting to believe this might be more related to the OS (driver) than the video card itself. — karlphillip, Jan 24 '12 at 13:49
Yes, that is strange. And are you using the same input data? Or did you run GStreamter (or whatever) on each of the computers? Because right now, i can't think of anything that would make any sense. — Face Bloke, Jan 24 '12 at 14:21

Problems converting YV12 to RGB through GLSL

1 Answers1

Linked