You should assign the result of your reduction to a variable, or it will be lost.
You should also be sure to include the proper thrust headers for the functions you are using.
Reviewing the documentation for thrust::reduce
we see that the reduction output type takes it's type from the types specified for the input iterators (ie. the input types must be convertible to the output type). Since your functor is mixing uchar4
and int
types, thrust doesn't know how to convert.
There are probably a number of ways to do what you want. It seems evident that you are just wanting to sum every pixel component (R,G,B) of every pixel together.
Since we want all types to match during the reduction (input and output), one approach would be to use transform_reduce
and convert the input types to int
quantities, before doing the sum reduction (then yielding the desired int
result).
This code shows the modifications, and compiles cleanly for me:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include <GL/glext.h>
#include <GL/glx.h>
#include <thrust/device_ptr.h>
#include <thrust/transform_reduce.h>
#include <thrust/functional.h>
#define GET_PROC_ADDRESS( str ) glXGetProcAddress( (const GLubyte *)str )
static void HandleError( cudaError_t err, const char *file, int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
struct transform_functor
{
__host__ __device__
int operator()(uchar4 data) const
{
return (int)data.x + (int)data.y + (int)data.z;
}
};
// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void sort_pixels(){
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
int pix_sum = thrust::transform_reduce(tptr, tptr+(DIM*DIM), transform_functor(), int(0), thrust::plus<int>());
printf("sum = %d\n", pix_sum);
cudaGraphicsUnmapResources( 1, &resource, NULL );
draw_func();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
break;
case 32:
sort_pixels();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
cudaGLSetGLDevice( 0 );
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "sort test" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
dim3 grid(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grid,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}
The reduce functor you wrote simply sums everything together (R,G,B components of all pixels). If that is what you want, fine. I think you could compute something like "average pixel intensity" with that. If you want the "average color", then you probably want to average the individual components separately. For this, we would probably use transform_reduce
, and the transform functor would convert the input uchar4
into a 3-tuple of int
, and the reduce functor would sum the individual components of the 3-tuples together, producing a 3-tuple result of the sum of the R,G, and B components, one in each int
of the 3-tuple. Here's a modified version of the above code that does this:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <cuda_gl_interop.h>
#include <GL/glext.h>
#include <GL/glx.h>
#include <thrust/device_ptr.h>
#include <thrust/transform_reduce.h>
#include <thrust/functional.h>
#include <thrust/tuple.h>
#define GET_PROC_ADDRESS( str ) glXGetProcAddress( (const GLubyte *)str )
static void HandleError( cudaError_t err, const char *file, int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ), file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
PFNGLBINDBUFFERARBPROC glBindBuffer = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers = NULL;
PFNGLGENBUFFERSARBPROC glGenBuffers = NULL;
PFNGLBUFFERDATAARBPROC glBufferData = NULL;
#define DIM 512
GLuint bufferObj;
cudaGraphicsResource *resource;
typedef thrust::tuple<int, int, int> tpl3;
struct transform_functor
{
__host__ __device__
tpl3 operator()(uchar4 data) const
{
tpl3 result;
result.get<0>() = (int)data.x;
result.get<1>() = (int)data.y;
result.get<2>() = (int)data.z;
return result;
}
};
struct reduce_functor
{
__host__ __device__
tpl3 operator()(tpl3 left, tpl3 right) const
{
tpl3 result;
result.get<0>() = left.get<0>() + right.get<0>();
result.get<1>() = left.get<1>() + right.get<1>();
result.get<2>() = left.get<2>() + right.get<2>();
return result;
}
};
// create a green/black pattern
__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char green = 128 + 127 * sin( abs(fx*100) - abs(fy*100) );
// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}
static void draw_func( void ) {
glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}
static void sort_pixels(){
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
thrust::device_ptr<uchar4> tptr = thrust::device_pointer_cast(devPtr);
tpl3 my_init;
my_init.get<0>() = 0;
my_init.get<1>() = 0;
my_init.get<2>() = 0;
tpl3 pix_sum = thrust::transform_reduce(tptr, tptr+(DIM*DIM), transform_functor(), my_init, reduce_functor());
printf("avg red = %f\n", (float)(pix_sum.get<0>())/(float)(DIM*DIM));
printf("avg grn = %f\n", (float)(pix_sum.get<1>())/(float)(DIM*DIM));
printf("avg blu = %f\n", (float)(pix_sum.get<2>())/(float)(DIM*DIM));
cudaGraphicsUnmapResources( 1, &resource, NULL );
draw_func();
}
static void key_func( unsigned char key, int x, int y ) {
switch (key) {
case 27:
HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
glDeleteBuffers( 1, &bufferObj );
exit(0);
break;
case 32:
sort_pixels();
break;
default:
break;
}
}
int main(int argc, char *argv[]) {
cudaGLSetGLDevice( 0 );
glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "sort test" );
glBindBuffer = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");
glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4, NULL, GL_DYNAMIC_DRAW_ARB );
cudaGraphicsGLRegisterBuffer( &resource, bufferObj, cudaGraphicsMapFlagsNone );
cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t size;
cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, &size, resource);
dim3 grid(DIM/16,DIM/16);
dim3 threads(16,16);
kernel<<<grid,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );
// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}