-1

I rendered a scene with opengl (I can also render it to a texture)

I want to use CUDA / Thrust to sort this rendered image

How do I link the texture I made from : cudaGraphicsGLRegisterImage to be used via thrust?

maybe something like this ? how to calculate an average from a int2 array using Thrust

Community
  • 1
  • 1
toejam
  • 1
  • 2
  • Could you be please more specific? My interpretation is the following. You have a function `f(xn, ym)` stored as a texture, where `(xn, ym)` are the pixel coordinates. You want to order the values of `f` and the pixel coordinates `(xn, ym)` accordingly? The post you have linked to does not appear to perform any sorting, but just calculates the "center of gravity" of a set of points. – Vitality Jul 26 '14 at 06:20
  • 2
    If you want help, provide a complete code that shows what you have produced so far (i.e. shows your creation of the image in OpenGL, render to texture, and display). It should be possible then to explain how to start at that point and make the pixel data available to thrust. By the way, there are quite a few [sample codes](http://docs.nvidia.com/cuda/cuda-samples/index.html#graphics) demonstrating various kinds of CUDA/OpenGL interop. – Robert Crovella Jul 26 '14 at 16:00

1 Answers1

4

I'm not sure it makes sense to try and use textures directly with thrust. However using an ordinary GL pixel buffer can be made to work directly with thrust.

The following example creates an openGL pixel buffer with a particular green/black pattern, and then displays it. When you press the space bar, the pixel buffer will be made available to CUDA via CUDA/OpenGL interop, and then a thrust sort (in-place) function is called. After the sort, the sorted pixel data is displayed again. The pixel data never leaves the GPU.

Here's the sample code:

#include <stdio.h> 
#include <stdlib.h> 
#include <string.h> 
#include <GL/gl.h> 
#include <GL/glut.h> 
#include <cuda_gl_interop.h> 
#include <GL/glext.h> 
#include <GL/glx.h> 
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#define GET_PROC_ADDRESS( str ) glXGetProcAddress( (const GLubyte *)str ) 

static void HandleError( cudaError_t err, const char *file,  int line ) { 
    if (err != cudaSuccess) { 
            printf( "%s in %s at line %d\n", cudaGetErrorString( err ),  file, line ); 
            exit( EXIT_FAILURE ); 
    } 
} 
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ )) 



PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL; 
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL; 
PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL; 
PFNGLBUFFERDATAARBPROC    glBufferData     = NULL; 

#define     DIM    512 

GLuint  bufferObj; 
cudaGraphicsResource *resource; 

struct sort_functor
{
  __host__ __device__
    bool operator()(uchar4 left, uchar4 right) const
    {
      return (left.y < right.y);
    }
};



// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) { 
// map from threadIdx/BlockIdx to pixel position 
  int x = threadIdx.x + blockIdx.x * blockDim.x; 
  int y = threadIdx.y + blockIdx.y * blockDim.y; 
  int offset = x + y * blockDim.x * gridDim.x; 

// now calculate the value at that position 
  float fx = x/(float)DIM - 0.5f; 
  float fy = y/(float)DIM - 0.5f; 
  unsigned char   green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) ); 

// accessing uchar4 vs unsigned char* 
  ptr[offset].x = 0; 
  ptr[offset].y = green; 
  ptr[offset].z = 0; 
  ptr[offset].w = 255; 
} 

static void draw_func( void ) { 

  glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 ); 
  glutSwapBuffers(); 
}
static void sort_pixels(){
  cudaGraphicsMapResources( 1, &resource, NULL ); 
  uchar4* devPtr; 
  size_t  size; 

  cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource); 

  thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
  thrust::sort(tptr, tptr+(DIM*DIM), sort_functor());
  cudaGraphicsUnmapResources( 1, &resource, NULL ); 
  draw_func();
}

static void key_func( unsigned char key, int x, int y ) { 
  switch (key) { 
    case 27: 
        HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) ); 
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 ); 
        glDeleteBuffers( 1, &bufferObj ); 
        exit(0); 
        break;
    case 32:
        sort_pixels();
        break;
    default:
        break;
  } 
} 



int main(int argc, char *argv[]) { 

  cudaGLSetGLDevice( 0 ); 

  glutInit( &argc, argv ); 
  glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA ); 
  glutInitWindowSize( DIM, DIM ); 
  glutCreateWindow( "sort test" ); 

  glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer"); 
  glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers"); 
  glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers"); 
  glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData"); 

  glGenBuffers( 1, &bufferObj ); 
  glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj ); 
  glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB ); 


  cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone ); 


  cudaGraphicsMapResources( 1, &resource, NULL ); 
  uchar4* devPtr; 
  size_t  size; 

  cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource); 

  dim3    grid(DIM/16,DIM/16); 
  dim3    threads(16,16); 
  kernel<<<grid,threads>>>( devPtr ); 
  cudaGraphicsUnmapResources( 1, &resource, NULL ); 

// set up GLUT and kick off main loop 
  glutKeyboardFunc( key_func ); 
  glutDisplayFunc( draw_func ); 
  glutMainLoop(); 
} 

compile like this:

nvcc -arch=sm_20 -o ogltest ogltest.cu -lglut

Here's what the display window looks like before sorting:

display before sorting

Here's what the display window looks like after sorting (after you press the space bar):

display after sorting

Note that we are sorting pixels based on the green component in this example.

You can press the ESC key to exit the app.

There are some updated versions of this sample code here.

Community
  • 1
  • 1
Robert Crovella
  • 143,785
  • 11
  • 213
  • 257
  • holy crap awesome answer. – toejam Jul 28 '14 at 17:16
  • Can you also do a reduce? I had trouble formulating it with : error : no suitable constructor exists to convert from "int" to "uchar4" when I tried a thrust::reduce(tptr, tptr + (WIDTH*HEIGHT)); – toejam Jul 28 '14 at 17:58
  • Yes, add the following two lines after the call to `thrust::sort`: `thrust::device_ptr cptr = thrust::device_pointer_cast(reinterpret_cast(devPtr)); printf("sum is %u\n", thrust::reduce(cptr, cptr + 10));` and add the appropriate `#include ` Note this is summing each of the pixel components in the `uchar4` (RGBA) pixel together, and putting the result in a `char` quantity. My guess is you don't want either of those behaviors. But other approaches can work too. If you have a new question, ask a new question, or this one will turn into a mess for future readers. – Robert Crovella Jul 28 '14 at 18:06
  • ok I followed your suggustion and made the [following detailed post](http://stackoverflow.com/questions/25002241/using-thrusts-reduce-operator-with-pixel-uchar4-data-error) with and example. – toejam Jul 28 '14 at 18:56