Segmentation fault while running cuda programs with OpenGL

Question

I am writing a cuda code available in the http://code.google.com/p/snp-gpgpu/source/browse/trunk/cuda_by_example_codes/chapter08/basic_interop.cu?r=4

I replaced the headers by mentioning headers explicitly and I mentioned them as follows :

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <cuda.h>
#include <cuda_runtime.h>
#include "cutil.h"
#include <cuda_gl_interop.h>

I am compiling the code issuing the command nvcc -o test_cuda basic_prog_num_1.cu -lGL -lGLU -lglut The program is getting compiled without any error and test_cuda executable gets created. But when I am trying to run the executable issuing ./test_cuda then it is giving me segmentation fault. One thing I would like to mention, previously when I was compiling with "cutil.h" an error was occurring that there is no such file or directory as cutil.h. So I explicitly downloaded "cutil.h" and included in the same folder as the program. The program is getting compiled without any error but while running it is returning a segmentation fault.

The code is given below :

PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL;
PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL;
PFNGLBUFFERDATAARBPROC    glBufferData     = NULL;

#define     DIM    512

GLuint  bufferObj;
cudaGraphicsResource *resource;

__global__ void kernel( uchar4 *ptr ) {
    // map from threadIdx/BlockIdx to pixel position
    int x = threadIdx.x + blockIdx.x * blockDim.x;
    int y = threadIdx.y + blockIdx.y * blockDim.y;
    int offset = x + y * blockDim.x * gridDim.x;

    // now calculate the value at that position
    float fx = x/(float)DIM - 0.5f;
    float fy = y/(float)DIM - 0.5f;
    unsigned char   green = 128 + 127 *
                            sin( abs(fx*100) - abs(fy*100) );

    // accessing uchar4 vs unsigned char*
    ptr[offset].x = 0;
    ptr[offset].y = green;
    ptr[offset].z = 0;
    ptr[offset].w = 255;
}

static void key_func( unsigned char key, int x, int y ) {
    switch (key) {
        case 27:

            HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
            glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
            glDeleteBuffers( 1, &bufferObj );
            exit(0);
    }
}

static void draw_func( void ) {

    glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
    glutSwapBuffers();
}


int main( int argc, char **argv ) {
    cudaDeviceProp  prop;
    int dev;

    memset( &prop, 0, sizeof( cudaDeviceProp ) );
    prop.major = 1;
    prop.minor = 0;
    HANDLE_ERROR( cudaChooseDevice( &dev, &prop ) );

    cudaGLSetGLDevice( dev );


    glutInit( &argc, argv );
    glutInitDisplayMode( GLUT_DOU@harrism : BLE | GLUT_RGBA );
    glutInitWindowSize( DIM, DIM );
    glutCreateWindow( "bitmap" );

    /*glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
    glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
    glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
    glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");*/


    glGenBuffers( 1, &bufferObj );
    glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
    glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4,
                  NULL, GL_DYNAMIC_DRAW_ARB );


        cudaGraphicsGLRegisterBuf@harrism : fer( &resource, 
                                      bufferObj, 
                                      cudaGraphicsMapFlagsNone );


   cudaGraphicsMapResources( 1, &resource, NULL );
    uchar4* devPtr;
    size_t  size;

        cudaGraphicsResourceGetMappedPointer( (void**)&devPtr, 
                                              &size, 
                                              resource);

    dim3    grids(DIM/16,DIM/16);
    dim3    threads(16,16);
    kernel<<<grids,threads>>>( devPtr );
  cudaGraphicsUnmapResources( 1, &resource, NULL );

    // set up GLUT and kick off main loop
    glutKeyboardFunc( key_func );
    glutDisplayFunc( draw_func );
    glutMainLoop();
}

Welcome to Stack Overflow! I see you have asked quite a few questions but you have not accepted any answers. You will have better luck getting people to answer your questions if you accept helpful answers. — harrism, Sep 19 '12 at 00:34
I don't know what you mean and I don't know how to accept answers. If I have any trouble I put my queries in this forum and people help me to solve those. If my problem gets solved then I write below that my problem got solved so that they don't waste any more time in that same problem and I thank them. Is there anything else that I should do? Please let me know what else should I do and how should I do it and I will surely do that — duttasankha, Sep 19 '12 at 00:44
Please see [this answer in the StackOverflow FAQ](http://stackoverflow.com/faq#howtoask), particularly the part about voting up good answers and how to accept them (hint, click the check mark). In general giving the whole FAQ a quick read is helpful. — harrism, Sep 19 '12 at 02:27
Cleanly install the libraries and redo each compilation process. I think you should provide more information about the code or your setup. — phoad, Sep 19 '12 at 10:18
I will go through the FAQ and I will accept the answers for my previous posts. I am extremely sorry about this as I didn't know about this thing. I will do it as soon as possible. Thanks. @phoad : I will edit my post above to provide the details of my code. Thanks. — duttasankha, Sep 19 '12 at 17:49
To find in which position your code gives segmentation fault, you should try to debug your code. If you are in Linux you might generate a core dump file and inspect it with gdb. Try to check each step of your code i.e. after the kernel call place a cudaThreadSynchronize and check for errors. Your code is too long and too complex at least for me to understand your problem.. — phoad, Sep 19 '12 at 20:15
@phoad : Thanks for your reply. I am in the ubuntu linux right now. I will try to give you more information if what I have already provided is not very much helpful to you. But most of the time I face a trouble that if I get late in giving a reply then nobody gets back to the topic and the post remain unanswered. This happens as I am absolutely new to the linux environment and so my reply might take some time as . Shall I make another topic to get an answer or shall I continue this post? Thanks again, I will get back to you after carrying out the task that you mentioned. — duttasankha, Sep 19 '12 at 23:37
You may generate core dumps at Linux. At the console write this command "ulimit -c 1000000". Then run your executable. You should build your executable with "-g" option to compile it with debug information. When you run your code it will generate a core dump. Then run this command "gdb ./myexe core.dump". This should provide you the line which resulted core dump. You may use Eclipse and trace your code in debug mode and focus the line, and share this line information with us. http://stackoverflow.com/questions/5115613/core-dump-file-analysis — phoad, Sep 20 '12 at 07:33
Hi! Thank you so much for your reply. Yesterday I actually downloaded and made eclipse ready for cuda and openGL. I tested simple Cuda and openGL code separately and they are running well. But when I wrote the above code which is cuda with openGL then it compiled without any error but when I ran the code nothing is coming up, like there is no output. I will today run the code in debug mode and give you the reply within few hours. Thanks again. — duttasankha, Sep 20 '12 at 15:10

score 1 · Accepted Answer · answered Sep 25 '12 at 02:45

You had several critical lines from the original book example commented out in your code. Why you did this, I don't know, but the net effect is that you were trying certain OGL calls (glGenBuffers was where the seg fault was coming from) before an openGL rendering context had been properly created.

Uncommenting these 4 lines resulted in showing that "GET_PROC_ADDRESS" macro was missing from your code and headers. This led me to discover that you didn't include the original book headers properly. Your code also contains some garbage in it, such as @harrism in various places.

Anyway I took your code and added some things to it, the following works for me:

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <GL/gl.h>
#include <GL/glut.h>
#include <cuda.h>
#include <cuda_runtime.h>
// #include "cutil.h"
#include <cuda_gl_interop.h>

#include <GL/glext.h>
#include <GL/glx.h>
#define GET_PROC_ADDRESS( str ) glXGetProcAddress( (const GLubyte *)str )

static void HandleError( cudaError_t err, const char *file,  int line ) {
    if (err != cudaSuccess) {
            printf( "%s in %s at line %d\n", cudaGetErrorString( err ),  file, line );
            exit( EXIT_FAILURE );
    }
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))



PFNGLBINDBUFFERARBPROC    glBindBuffer     = NULL;
PFNGLDELETEBUFFERSARBPROC glDeleteBuffers  = NULL;
PFNGLGENBUFFERSARBPROC    glGenBuffers     = NULL;
PFNGLBUFFERDATAARBPROC    glBufferData     = NULL;

#define     DIM    512

GLuint  bufferObj;
cudaGraphicsResource *resource;

__global__ void kernel( uchar4 *ptr ) {
// map from threadIdx/BlockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;

// now calculate the value at that position
float fx = x/(float)DIM - 0.5f;
float fy = y/(float)DIM - 0.5f;
unsigned char   green = 128 + 127 *
                        sin( abs(fx*100) - abs(fy*100) );

// accessing uchar4 vs unsigned char*
ptr[offset].x = 0;
ptr[offset].y = green;
ptr[offset].z = 0;
ptr[offset].w = 255;
}

static void key_func( unsigned char key, int x, int y ) {
  switch (key) {
    case 27:

        HANDLE_ERROR( cudaGraphicsUnregisterResource( resource ) );
        glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, 0 );
        glDeleteBuffers( 1, &bufferObj );
        exit(0);
  }
}

static void draw_func( void ) {

glDrawPixels( DIM, DIM, GL_RGBA, GL_UNSIGNED_BYTE, 0 );
glutSwapBuffers();
}


int main( int argc, char **argv ) {
cudaDeviceProp  prop;
int dev;

memset( &prop, 0, sizeof( cudaDeviceProp ) );
prop.major = 1;
prop.minor = 0;
HANDLE_ERROR( cudaChooseDevice( &dev, &prop ) );

cudaGLSetGLDevice( dev );

glutInit( &argc, argv );
glutInitDisplayMode( GLUT_DOUBLE | GLUT_RGBA );
glutInitWindowSize( DIM, DIM );
glutCreateWindow( "bitmap" );

glBindBuffer    = (PFNGLBINDBUFFERARBPROC)GET_PROC_ADDRESS("glBindBuffer");
glDeleteBuffers = (PFNGLDELETEBUFFERSARBPROC)GET_PROC_ADDRESS("glDeleteBuffers");
glGenBuffers    = (PFNGLGENBUFFERSARBPROC)GET_PROC_ADDRESS("glGenBuffers");
glBufferData    = (PFNGLBUFFERDATAARBPROC)GET_PROC_ADDRESS("glBufferData");


glGenBuffers( 1, &bufferObj );
glBindBuffer( GL_PIXEL_UNPACK_BUFFER_ARB, bufferObj );
glBufferData( GL_PIXEL_UNPACK_BUFFER_ARB, DIM * DIM * 4,
              NULL, GL_DYNAMIC_DRAW_ARB );


cudaGraphicsGLRegisterBuffer( &resource,
                                  bufferObj,
                                  cudaGraphicsMapFlagsNone );


cudaGraphicsMapResources( 1, &resource, NULL );
uchar4* devPtr;
size_t  size;

cudaGraphicsResourceGetMappedPointer( (void**)&devPtr,
                                          &size,
                                          resource);

dim3    grids(DIM/16,DIM/16);
dim3    threads(16,16);
kernel<<<grids,threads>>>( devPtr );
cudaGraphicsUnmapResources( 1, &resource, NULL );

// set up GLUT and kick off main loop
glutKeyboardFunc( key_func );
glutDisplayFunc( draw_func );
glutMainLoop();
}

Segmentation fault while running cuda programs with OpenGL

1 Answers1