-1

I would like use function pointers in my Cuda C++ code, like below,

typedef __device__ void customFunc(const char*, uint64_t, char*, const uint64_t);

which is what I'm after. Its equivalent without "__device__" does work perfectly well.

Are function pointers supported in Cuda?

Edit:

I'm specifically interested in how to use __device__ functions as functions pointers to __device__ functions

talonmies
  • 70,661
  • 34
  • 192
  • 269
WhiteFlowers
  • 43
  • 11

1 Answers1

2

There is no magic involved in using device function pointers in device code. It is functionally and syntactically identical to standard C++.

For example:

#include <cstdio>

typedef int (*ufunc)(int args);

__device__ int f1(int x)
{
    int res = 2*x;
    printf("f1 arg = %d, res = %d\n", x, res);
    return res;
}

__device__ int f2(int x, int y, ufunc op)
{
    int res = x + op(y);
    printf("f2 arg = %d, %d, res = %d\n", x, y, res);
    return res;
}


__global__ void kernel(int *z) 
{

    int x = threadIdx.x;
    int y = blockIdx.x;
    int tid = threadIdx.x + blockDim.x * blockIdx.x;

    z[tid] = f2(x, y, &f1);
}

int main()
{
    const int nt = 4, nb = 4;
    int* a_d;
    cudaMalloc(&a_d, sizeof(float) * nt *nb);

    kernel<<<nb, nt>>>(a_d);
    cudaDeviceSynchronize();
    cudaDeviceReset();

    return 0;
}
#include <cstdio>

typedef int (*bfunc)(int args);

__device__ int f1(int x)
{
    int res = 2*x;
    printf("f1 arg = %d, res = %d\n", x, res);
    return res;
}

__device__ int f2(int x, int y, bfunc op)
{
    int res = x + f1(y);
    printf("f2 arg = %d, %d, res = %d\n", x, y, res);
    return res;
}


__global__ void kernel(int *z) 
{

    int x = threadIdx.x;
    int y = blockIdx.x;
    int tid = threadIdx.x + blockDim.x * blockIdx.x;

    z[tid] = f2(x, y, &f1);
}

int main()
{
    const int nt = 4, nb = 4;
    int* a_d;
    cudaMalloc(&a_d, sizeof(float) * nt *nb);

    kernel<<<nb, nt>>>(a_d);
    cudaDeviceSynchronize();
    cudaDeviceReset();

    return 0;
}

Here, we define a simple pointer to a unary functor as a type, and then a device function which takes that type as an argument. The static assignment of the function pointer within the kernel call is handled at compile time and everything works. If you want to have function pointer selection happen at run time, then you need to follow the instructions given in the link you were already provided with.

The important thing to keep in mind here is that in CUDA it is not legal to include CUDA specifiers (__device__, __constant__, __global__, etc) in type definitions. Each variable instance has a specifier as part of its definition.

Community
  • 1
  • 1
talonmies
  • 70,661
  • 34
  • 192
  • 269