I am trying to call a functor from a Cuda Kernel. The functor is given by the programmer and my library uses it to perform some functions and returns the processed array.
Since the functor is in Host Memory Space, I am Copying the object to Device and using the functor in my kernel call.
Error : It says the Functor operator() is inaccessible from the kernel.'
I cannot understand where I am Wrong.
Note : Full Error Message dumped At the end.
Here is the Full Code :
#include <cstdio>
using namespace std;
class my_functor
{
__device__
int operator() (int x)
{
return x*10;
}
};
template <class T,typename Func>
__global__
void for_each_kernel (T* d_v,int N,Func f)
{
int idx = blockIdx.x*blockDim.x + threadIdx.x;
int num_threads = gridDim.x * blockDim.x;
__shared__ T s_x[1024];
for(int i = idx; i < N; i += num_threads)
{
s_x[threadIdx.x] = d_v[i];
// Call The Functor Here
s_x[threadIdx.x] = f (s_x[threadIdx.x]);
d_v[i] = s_x[threadIdx.x];
}
}
template <class T>
class device_vector
{
T *d_v;
int numEle;
public :
device_vector (T *h_v,int N)
{
cudaMalloc ((T**)&d_v,N*sizeof(T));
cudaMemcpy(d_v, h_v, N * sizeof(T), cudaMemcpyHostToDevice);
numEle = N;
}
void set (T data,int index)
{
cudaMemcpy (&d_v[index],&data,sizeof(T),cudaMemcpyHostToDevice);
}
T get (int index)
{
T temp;
cudaMemcpy (&temp,&d_v[index],sizeof(T),cudaMemcpyDeviceToHost);
return temp;
}
// Only Provide Start And End Vertices Fot Which you Want To Do Some Operation
template <typename Func>
void for_each (int start,int end,Func f)
{
Func *new_func;
cudaMalloc (&new_func,sizeof(my_functor));
cudaMemcpy (new_func,&f,sizeof (my_functor),cudaMemcpyHostToDevice);
for_each_kernel<<<26,1024>>> (d_v,end-start+1,*new_func);
}
};
int a[1<<28];
int main ()
{
int N = 1<<28;
my_functor functor;
for (int i=0;i<N;i++)
a[i] = i;
device_vector<int> d (a,N);
d.for_each (0,N-1,functor);
printf ("Getting Element At Index %d : %d \n",100,d.get(100));
return 0;
}
Error Message Dump :
device_vector.cu(40): error: function "my_functor::operator()"
(18): here is inaccessible
detected during:
instantiation of "void for_each_kernel(T *, int, Func) [with T=int, Func=my_functor]"
(107): here
instantiation of "void device_vector<T>::for_each(int, int, Func) [with T=int, Func=my_functor]"
(125): here
1 error detected in the compilation of "/tmp/tmpxft_00005da2_00000000-9_device_vector.cpp1.ii".