How do I use CUDA driver functions?

Question

I have a GUI application with a producer thread and an OpenGL thread, the OpenGL thread needs to call CUDA functions and the producer needs to call cudaMemcpy etc.

No matter what I do I can't seem to get the CUDA driver api to work. Every time I try to use these function I get a cudaErrorMissingConfiguration.

I want to use multi-threaded CUDA, what is the paradigmatic way to accomplish this?

Original

void program::initCuda()
{
    CUresult a;pctx=0;
    cudaSafeCall(cudaSetDevice(0));
    cudaSafeCall(cudaGLSetGLDevice(0));
    a=cuInit(0);
    cudaSafeCall(cudaFree(0));
    cout <<"cuInit :" <<a << endl;assert(a == cudaSuccess);
    //a=cuCtxGetCurrent(pctx);
    a=cuCtxCreate(pctx,CU_CTX_SCHED_AUTO,0);
    cout <<"GetContext :" <<a << endl;assert(a == cudaSuccess);
    //Fails with cudaErrorMissingConfiguration
    a=cuCtxPopCurrent(pctx);
    cout <<"cuCtxPopCurrent :" <<a << endl;assert(a == cudaSuccess);
    cout <<"Initialized CUDA" << endl;
}

Revised

void glStream::initCuda()
{
    CUresult a;
    pctx=0;
    cudaSafeCall(cudaSetDevice(0));
    cudaSafeCall(cudaGLSetGLDevice(0));
    cudaFree(0);// From post http://stackoverflow.com/questions/10415204/how-to-create-a-cuda-context seems to indicate that `cudaSetDevice` should make a context.
    a=cuCtxGetCurrent(pctx);
    cout <<"GetContext :" <<a << endl;assert(a == cudaSuccess);
    a=cuCtxPopCurrent(pctx);
    cout <<"cuCtxPopCurrent :" <<a << endl;assert(a == cudaSuccess);
    cout <<"Initialized CUDA" << endl;
}

Either use the driver API to create a context, or use the runtime API. But don't use *both*, which is what you have now. — talonmies, May 28 '13 at 11:51
@talonmies I made a revision to the post but it still doesn't help, I can't seem to help, it still reports error code `1`. — Mikhail, May 28 '13 at 15:26
Now you don't have any context establishment at all. `cudaSetDevice` does not establish a context. — talonmies, May 28 '13 at 15:32
@talonmies Okay I added a `cudaFree(0)`, but that doesn't seem to work. How do I actually create a context? Also this post http://stackoverflow.com/questions/10415204/how-to-create-a-cuda-context seems to indicate that `cudaSetDevice` should make a context. — Mikhail, May 28 '13 at 15:45
Perhaps you could look at one of the [cuda samples](http://docs.nvidia.com/cuda/cuda-samples/index.html) that uses the driver API, like [vectorAddDrv](http://docs.nvidia.com/cuda/cuda-samples/index.html#vector-addition-driver-api) to learn how to use the driver API. — Robert Crovella, May 28 '13 at 16:02

score 3 · Accepted Answer · answered May 28 '13 at 18:06

The simplest version of your second code should look like this:

#include <iostream>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>

int main(void)
{
    CUresult a;
    CUcontext pctx;
    cudaSetDevice(0); // runtime API creates context here
    a = cuCtxGetCurrent(&pctx);
    std::cout << "GetContext : " << a << std::endl;
    assert(a == CUDA_SUCCESS);
    a = cuCtxPopCurrent(&pctx);
    std::cout << "cuCtxPopCurrent : " << a << std::endl;
    assert(a == CUDA_SUCCESS);
    std::cout << "Initialized CUDA" << std::endl;

    return 0;
}

which yields the following on OS X 10.6 with CUDA 5.0:

$ g++ -I/usr/local/cuda/include -L/usr/local/cuda/lib driver.cc -lcuda -lcudart
$ ./a.out
GetContext :0
cuCtxPopCurrent :0
Initialized CUDA

ie. "just works". Here the context is lazily initiated by the cudaSetDevice call (note I incorrectly asserted that cudaSetDevice doesn't establish a context, but at least in CUDA 5 it appears to. This behaviour may have changed when the runtime API was revised in CUDA 4).

Alternatively, you can use the driver API to initiate the context:

#include <iostream>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>

int main(void)
{
    CUresult a;
    CUcontext pctx;
    CUdevice device;
    cuInit(0);
    cuDeviceGet(&device, 0);
    std::cout << "DeviceGet : " << a << std::endl;
    cuCtxCreate(&pctx, CU_CTX_SCHED_AUTO, device ); // explicit context here
    std::cout << "CtxCreate : " << a << std::endl;
    assert(a == CUDA_SUCCESS);
    a = cuCtxPopCurrent(&pctx);
    std::cout << "cuCtxPopCurrent : " << a << std::endl;
    assert(a == CUDA_SUCCESS);
    std::cout << "Initialized CUDA" << std::endl;

    return 0;
}

which also "just works":

$ g++ -I/usr/local/cuda/include -L/usr/local/cuda/lib driver.cc -lcuda -lcudart
$ ./a.out
DeviceGet : 0
CtxCreate : 0
cuCtxPopCurrent : 0
Initialized CUDA

What you shouldn't do is mix both as in your first example. All I can suggest is try both of these and confirm they work for you, then adopt the call sequences to whatever it is you are actually trying to achieve.

How do I use CUDA driver functions?

1 Answers1