0

I have this (working) CPU code:

#define NF 3
int ND;

typedef double (*POT)(double x, double y);

typedef struct {
    POT pot[NF];
} DATAMPOT;

DATAMPOT *datampot;

double func0(double x, double y);
double func1(double x, double y);
double func2(double x, double y);


int main(void)
{
    int i;

    ND=5;
    datampot=(DATAMPOT *)malloc(ND*sizeof(DATAMPOT));

    for(i=0;i<ND;i++){
        datampot[i].pot[0]=func0;
        datampot[i].pot[1]=func1;
        datampot[i].pot[2]=func2;
    }

    return 0;
}

Now I try a GPU version like this

#define NF 3
int ND;

typedef double (*POT)(double x, double y);

typedef struct {
    POT pot[NF];
} DATAMPOT;

DATAMPOT *dev_datampot;

__device__ double z_func0(double x, double y);
__device__ double z_func1(double x, double y);
__device__ double z_func2(double x, double y);

__global__ void assign(DATAMPOT *dmp, int n)
{
    int i;

    for(i=0;i<n;i++){
        (dmp+i)->pot[0]=z_func0;
        (dmp+i)->pot[1]=z_func1;
        (dmp+i)->pot[2]=z_func2;
    }

}

int main(void)
{
    int i;

    ND=5;
    cudaMalloc((void**)&dev_datampot,ND*sizeof(DATAMPOT));

    assign<<<1,1>>>(dev_datampot,ND);

    return 0;
}

but the assignment of device function pointers does not work. Where is the mistake? And how it can be corrected? Thanks you very much in advance. Michele

micheletuttafesta
  • 565
  • 2
  • 6
  • 11

3 Answers3

1

According to the CUDA C Programming Guide,

D.2.4.3 Function Pointers

Function pointers to __global__ functions are supported in host code, but not in device code.

Function pointers to __device__ functions are only supported in device code compiled for devices of compute capability 2.x.

It is not allowed to take the address of a __device__ function in host code.

My guess is you're compiling for a compute capability which is lower than 2.0.

Community
  • 1
  • 1
Pedro
  • 1,344
  • 9
  • 17
  • I use a GeForce GTS 450, compute capability 2.1. After I do the cudaMalloc((void**)&dev_datampot,ND*sizeof(DATAMPOT)); is it possible to link the three function pointers of the member array pot, to the device functions z_func1,z_func2,z_func3 ? – micheletuttafesta Jun 19 '12 at 15:47
  • @micheletuttafesta: You would have to do that from within a device function, which is what you do in your example. Are you compiling for compute capability 2.0, e.g. with `-arch=sm_20`? – Pedro Jun 19 '12 at 16:01
  • Sorry for my very late answer Pedro... yes I compile with -arch=sm_20 option. However may be I have found a solution for my question. I will write it as soon as possible – micheletuttafesta Jun 21 '12 at 09:47
1

Hope this will help someone

#define NF 3
int ND;

typedef double (*POT)(double x, double y);

typedef struct {
    POT pot[NF];
} DATAMPOT;

DATAMPOT *dev_datampot;

__device__ double z_func0(double x, double y);
__device__ double z_func1(double x, double y);
__device__ double z_func2(double x, double y);

//Static pointers to the above device functions    
__device__ POT z_func0_pointer=z_func0;  
__device__ POT z_func1_pointer=z_func1;
__device__ POT z_func2_pointer=z_func2;



int main(void)
{
    int i;
    POT pot_pointer;

    ND=5;
    cudaMalloc((void**)&dev_datampot,ND*sizeof(DATAMPOT));

    for(i=0;i<ND;++i){  
     cudaMemcpyFromSymbol( &pot_pointer,z_func0_pointer, sizeof( POT ) );
  cudaMemcpy(&dev_datampot[i].pot[0]),&pot_pointer,sizeof(POT),cudaMemcpyHostToDevice);

     cudaMemcpyFromSymbol( &pot_pointer,z_func1_pointer, sizeof( POT ) );
  cudaMemcpy(&dev_datampot[i].pot[1]),&pot_pointer,sizeof(POT),cudaMemcpyHostToDevice);

     cudaMemcpyFromSymbol( &pot_pointer,z_func2_pointer, sizeof( POT ) );
  cudaMemcpy(&dev_datampot[i].pot[2]),&pot_pointer,sizeof(POT),cudaMemcpyHostToDevice);
    }

    return 0;
}
micheletuttafesta
  • 565
  • 2
  • 6
  • 11
0

What is your compiler option? On device with compute capacity 1.3 or lower, device function must be inlined, so you can't use device function pointer.

yyfn
  • 737
  • 4
  • 4