0

I am trying to re-structure my code for MJPEG decoding so that I can implement it in a heterogeneous fashion. I started by finding code segments which I thought could benefit from parallel processing. I got the single threaded CPU code from this link.

The following code works properly. Please note that I omitted some portions of the code to represent it better here.

//Original MCU blocks
MCU_main AllMCUs[nb_MCU_X * nb_MCU_Y];

int32_t MCU[64];


//All RGB blocks
RGB_struct AllRGBs[nb_MCU_X * nb_MCU_Y];

//Counter for the main MCUs
uint32_t x;

x=0;

///////--------Unpack-blocks for 3 color components

                    //process 8x8 blocks horizontally
                    for (index_X = 0; index_X < nb_MCU_X; index_X++) {
                        //process 8x8 blocks vertically
                        for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {

                                AllMCUs[x].block_id=x;

                                component_index = component_order[0];

                                AllMCUs[x].MCU= MCU;

                            unpack_block(movie, &scan_desc,0, AllMCUs[x].MCU);


                            for (int i=0; i < 64; i++)
                         {
                             AllMCUs[x].MCU_Y[i]= AllMCUs[x].MCU[i];
                          }


                        unpack_block(movie, &scan_desc,1, AllMCUs[x].MCU);


                        for (int i=0; i < 64; i++)
                        {
                            AllMCUs[x].MCU_Cb[i]= AllMCUs[x].MCU[i];
                        }

                        unpack_block(movie, &scan_desc,2, AllMCUs[x].MCU);

                        for (int i=0; i < 64; i++)
                    {
                        AllMCUs[x].MCU_Cr[i]= AllMCUs[x].MCU[i];
                    }
                        x+= 1;

                        }
                    }


///////-----------IQZZ, IDCT, Upsampling and Color-conversion---------///////

uint32_t y=0;
x=0;

for (index_X = 0; index_X < nb_MCU_X; index_X++) {
   for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {

            if(AllMCUs[x].block_id == y){
        //iqzz, idct and upsampling for Y       
        iqzz_block();
        idct();
        upsampler();

        //iqzz, idct and upsampling for Cb
        iqzz_block();
        idct();
        upsampler();

        //iqzz, idct and upsampling for Cr
        iqzz_block();
        idct();
        upsampler();

        AllRGBs[x].block_id= x;
        //color-conversion
        if(color && (SOF_section.n > 1)){
         YCbCr_to_ARGB(YCbCr_MCU, &AllRGBs[x].RGB_MCU, max_ss_h, max_ss_v);
        }
        else{
            to_NB(YCbCr_MCU, RGB_MCU, max_ss_h, max_ss_v);
        }

screen_cpyrect(index_Y * MCU_sy * max_ss_h, index_X * MCU_sx * max_ss_v, MCU_sy * max_ss_h, MCU_sx * max_ss_v, &AllRGBs[x].RGB_MCU);


x+=1;
y+=1;
            }
        }
    }

The correct output looks like this:

enter image description here

My structure for RGB_struct is as follows:

typedef struct{
uint32_t block_id;
uint32_t *RGB_MCU
}RGB_struct;

My structure for MCU_main (not immediately relevant though) is this:

typedef struct{
uint32_t block_id;
int32_t *MCU_original;
int32_t MCU_Cb[64], MCU_Cr[64], MCU_Y[64];
}MCU_main;

screen_cpyrect copies every 8x8 block on the screen according to values of the offset. The code for it is as follows (same as the code from this link):

void screen_cpyrect(uint32_t x, uint32_t y, uint32_t w, uint32_t h, void *ptr)
{
    void *dest_ptr;
    void *src_ptr;
    uint32_t line;
    uint32_t w_internal = w, h_internal = h;
    int w_length;

    SDL_LockSurface(screen);
    w_length = screen->pitch / (screen->format->BitsPerPixel / 8 );

#ifdef DEBUG
    if ((y) > screen->h) {
        printf("[%s] : block can't be copied, "
                "not in the screen (too low)\n", __func__);
        exit(1);
    }
    if ((x) > screen->w) {
        printf("[%s] : block can't be copied, "
                "not in the screen (right border)\n", __func__);
        exit(1);
    }
#endif
    if ((x+w) > screen->w) {
        w_internal = screen->w -x;
    }
    if ((y+h) > screen->h) {
        h_internal = screen->h -y;
    }
    for(line = 0; line < h_internal ; line++)
    {
        // Positionning src and dest pointers
        //  _ src : must be placed at the beginning of line "line"
        //  _ dest : must be placed at the beginning
        //          of the corresponding block :
        //(line offset + current line + position on the current line)
        // We assume that RGB is 4 bytes

        dest_ptr = (void*)((uint32_t *)(screen->pixels) +
                ((y+line)*w_length) + x);
        src_ptr = (void*)((uint32_t *)ptr + ((line * w)));
        memcpy(dest_ptr,src_ptr,w_internal * sizeof(uint32_t));
    }

    SDL_UnlockSurface(screen);
}

I want the function screen_cpyrect to remain on the CPU. However, in my working code, it is connected to the rest of the JPEG functions. In order to make my code more parallel, I sought to separate it from the other JPEG functions.

The following code does NOT produce the correct output:

///////--------Unpack-blocks for 3 color components

//process 8x8 blocks horizontally
for (index_X = 0; index_X < nb_MCU_X; index_X++) {
    //process 8x8 blocks vertically
    for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {

                        //For Y
                        unpack_block(movie, &scan_desc,0, AllMCUs[x].MCU);

                        //For Cb
                        unpack_block(movie, &scan_desc,1, AllMCUs[x].MCU);

                        //For Cr
                        unpack_block(movie, &scan_desc,2, AllMCUs[x].MCU);

                        x+= 1;

    }

}

///////-----------IQZZ, IDCT, Upsampling and Color-conversion---------///////

uint32_t y=0;
x=0;

for (index_X = 0; index_X < nb_MCU_X; index_X++) {
   for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {

            if(AllMCUs[x].block_id == y){
        //iqzz, idct and upsampling for Y       
        iqzz_block();
        idct();
        upsampler();

        //iqzz, idct and upsampling for Cb
        iqzz_block();
        idct();
        upsampler();

        //iqzz, idct and upsampling for Cr
        iqzz_block();
        idct();
        upsampler();

        AllRGBs[x].block_id= x;
        //color-conversion
        if(color && (SOF_section.n > 1)){
         YCbCr_to_ARGB(YCbCr_MCU, &AllRGBs[x].RGB_MCU, max_ss_h, max_ss_v);
        }
        else{
            to_NB(YCbCr_MCU, RGB_MCU, max_ss_h, max_ss_v);
        }

x+=1;
y+=1;
            }
        }
    }

//COPYING ALL THE PROCESSED BLOCKS TO THE SCREEN SEPARATELY 
x=0;
for (index_X = 0; index_X < nb_MCU_X; index_X++) {
    for (index_Y = 0; index_Y < nb_MCU_Y; index_Y++) {
        screen_cpyrect(index_Y * MCU_sy * max_ss_h, index_X * MCU_sx * max_ss_v, MCU_sy * max_ss_h, MCU_sx * max_ss_v, &AllRGBs[x].RGB_MCU);
        x+=1;
}
}

if (screen_refresh() == 1) 
{
    end_of_file = 1;
}

I get a completely distorted output. The frames are processed but they are not displayed correctly. The incorrect output looks like this:

enter image description here

The code for color conversion is as follows (copied from this link):

void YCbCr_to_ARGB(uint8_t *YCbCr_MCU[3], uint32_t *RGB_MCU, uint32_t nb_MCU_H, uint32_t nb_MCU_V)
{

    uint8_t *MCU_Y, *MCU_Cb, *MCU_Cr;
    int R, G, B;
    uint32_t ARGB;
    uint8_t index, i, j;

    MCU_Y = YCbCr_MCU[0];
    MCU_Cb = YCbCr_MCU[1];
    MCU_Cr = YCbCr_MCU[2];
    for (i = 0; i < 8 * nb_MCU_V; i++) {
        for (j = 0; j < 8 * nb_MCU_H; j++) {
            index = i * (8 * nb_MCU_H)  + j;
            R = (MCU_Cr[index] - 128) * 1.402f + MCU_Y[index];
            B = (MCU_Cb[index] - 128) * 1.7772f + MCU_Y[index];
            G = MCU_Y[index] - (MCU_Cb[index] - 128) * 0.34414f -
                (MCU_Cr[index] - 128) * 0.71414f;
            /* Saturate */
            if (R > 255)
                R = 255;
            if (R < 0)
                R = 0;
            if (G > 255)
                G = 255;
            if (G < 0)
                G = 0;
            if (B > 255)
                B = 255;
            if (B < 0)
                B = 0;
            ARGB = ((R & 0xFF) << 16) | ((G & 0xFF) << 8) | (B & 0xFF);
            // ARGB = 0xFF << 8;
            RGB_MCU[(i * (8 * nb_MCU_H) + j)] = ARGB;
        }
    }
}

I do not understand why my modified code for decoding frames should not work correctly. I tried to debug it in various ways and I could find no glaring problems in the alternate implementation. All the values of the AllRGBs array are being stored properly. I printed out the values and they are identical.

RGB before cpy_rect:0x7fff8f868bf8 RGB before cpy_rect:0x7fff8f868c08 RGB before cpy_rect:0x7fff8f868c18 RGB before cpy_rect:0x7fff8f868c28.......


RGB after cpy_rect:0x7fff8f868bf8 RGB after cpy_rect:0x7fff8f868c08 RGB after cpy_rect:0x7fff8f868c18 RGB after cpy_rect:0x7fff8f868c28  

Do you have any thoughts on why one version of the code is giving the correct output and not the other?

EDIT:

In order to test my code, I wrote the code to display each 8x8 block as soon as it is copied to the screen. I noticed that in my correct code, the blocks are copied correctly. However, in my incorrect code the 8x8 blocks are not displayed correctly (as you can see in the photos I attached).

Why should this happen even though the RGB values in both my programs are the same? How is it that when I copy all the RGB blocks on the screen in one go I get the incorrect output?

EDIT:

After testing the code in every way I can think of, I am starting to think this issue is arising because of the void pointer.

One of the input parameters of screen_cpyrect is a void pointer:

extern void screen_cpyrect(uint32_t x, uint32_t y, uint32_t w, uint32_t h, void *ptr)

RGB_MCU is being used as a function argument in the place of ptr. However, ptr is a pointer to everything whereas RGB_MCU is a pointer to the type uint32_t as shown in this declaration:

typedef struct{
uint32_t block_id;
uint32_t *RGB_MCU
}RGB_struct;

I read in the comment for this answer that when a void pointer is used incorrectly it can cause the program to give the incorrect output or simply crash.

How can I confirm that the problem I am experiencing is due to the void pointer?

a_sid
  • 577
  • 10
  • 28
  • 2
    Your example is hard to understand, you need to read this https://stackoverflow.com/help/mcve. With that out of the way doing a diff on your 2 main code paths I can see screen_cpyrect does not get called when the block_id is not equal to y. while in your refactored version it gets called everytime. – aram May 14 '18 at 05:31
  • @Aram I set y and x both to zero and they both get incremented together. So x and y are equal to one another throughout the duration of the program. – a_sid May 14 '18 at 15:41
  • My code is the same as the code in [this](https://github.com/claf/MJPEG/tree/master/tima_seq_version/src) link. I have only shown the parts that I changed to direct attention towards my actual problem. – a_sid May 14 '18 at 15:44
  • @Aram Is there anything that is particularly unclear about the code I have posted? As I said, I only posted the code that I changed for the sake of clarity and conciseness. – a_sid May 14 '18 at 15:47
  • Yes x and y are equal but block_id is equal to y? I'm talking about this line ' if(AllMCUs[x].block_id == y) { '. – aram May 14 '18 at 16:17
  • @Aram I wrote `AllMCUs[x].block_id=x;` when I am unpacking the 8x8 blocks. I modified the code I posted to depict this. – a_sid May 14 '18 at 16:48
  • Should there be a specific amount of delay prior to copying every block on the screen? – a_sid May 14 '18 at 18:27
  • Could this problem be occurring because `&AllRGBs[x].RGB_MCU` is being passed to `screen_cpyrect` as a void pointer? – a_sid May 15 '18 at 16:59
  • `uint32_t *RGB_MCU` is already a pointer. So I think it should not be forwarded by reference. – JHBonarius Jun 21 '18 at 06:13

0 Answers0