1

I am trying to call a functor from a Cuda Kernel. The functor is given by the programmer and my library uses it to perform some functions and returns the processed array.

Since the functor is in Host Memory Space, I am Copying the object to Device and using the functor in my kernel call.

Error : It says the Functor operator() is inaccessible from the kernel.'

I cannot understand where I am Wrong.

Note : Full Error Message dumped At the end.

Here is the Full Code :

#include <cstdio>

using namespace std;

class my_functor
{

        __device__
        int operator() (int x)
        {
                return x*10;
        }
};



template <class T,typename Func>
    __global__
void for_each_kernel (T* d_v,int N,Func f)
{
    int idx = blockIdx.x*blockDim.x + threadIdx.x;
    int num_threads = gridDim.x * blockDim.x;

    __shared__ T s_x[1024];

    for(int i = idx; i < N; i += num_threads)
    {

        s_x[threadIdx.x] = d_v[i];

        // Call The Functor Here
        s_x[threadIdx.x] = f (s_x[threadIdx.x]);

        d_v[i] = s_x[threadIdx.x];
    }
}

template <class T>
class device_vector
{
    T *d_v;
    int numEle;

    public :

    device_vector (T *h_v,int N)
    {
        cudaMalloc  ((T**)&d_v,N*sizeof(T));
        cudaMemcpy(d_v, h_v, N * sizeof(T), cudaMemcpyHostToDevice);
        numEle = N;


    }

    void set (T data,int index)
    {
        cudaMemcpy (&d_v[index],&data,sizeof(T),cudaMemcpyHostToDevice);
    }


    T get (int index)
    {
        T temp;
        cudaMemcpy (&temp,&d_v[index],sizeof(T),cudaMemcpyDeviceToHost);
        return temp;
    }


    // Only Provide Start And End Vertices Fot Which you Want To Do Some Operation
    template <typename Func>
        void for_each (int start,int end,Func f)
        {

            Func *new_func;

            cudaMalloc (&new_func,sizeof(my_functor));
            cudaMemcpy (new_func,&f,sizeof (my_functor),cudaMemcpyHostToDevice);    

            for_each_kernel<<<26,1024>>> (d_v,end-start+1,*new_func);   
        }   

};


int a[1<<28];
int main ()
{
    int N = 1<<28;

    my_functor functor; 

    for (int i=0;i<N;i++)
        a[i] = i;

    device_vector<int> d (a,N);

    d.for_each (0,N-1,functor);

    printf ("Getting Element At Index %d : %d \n",100,d.get(100));

    return 0;
}

Error Message Dump :

device_vector.cu(40): error: function "my_functor::operator()"
(18): here is inaccessible
          detected during:
            instantiation of "void for_each_kernel(T *, int, Func) [with T=int, Func=my_functor]" 
(107): here
            instantiation of "void device_vector<T>::for_each(int, int, Func) [with T=int, Func=my_functor]" 
(125): here

1 error detected in the compilation of "/tmp/tmpxft_00005da2_00000000-9_device_vector.cpp1.ii".
Rohith R
  • 1,309
  • 2
  • 17
  • 36
  • @talonmies is this a better way to put forth my problem....? – Rohith R Feb 19 '16 at 06:29
  • You should really learn more about C++ before you proceed to an advanced topics, such as GPU-accelerated computing. Check out this thread: [The Definitive C++ Book Guide and List](http://stackoverflow.com/questions/388242/the-definitive-c-book-guide-and-list) – Ivan Aksamentov - Drop Feb 19 '16 at 16:37

1 Answers1

1

You are getting the inaccessible error because my_functor is a class. Class members are, by default, private. If you change your definition of my_functorlike this:

class my_functor
{
        public:
        __device__
        int operator() (int x)
        {
                return x*10;
        }
};

or change it to a struct (note struct members are public by default):

struct my_functor
{

        __device__
        int operator() (int x)
        {
                return x*10;
        }
};

you might find the code compiles. There are possibly other things wrong with the code, but either of these modifications should remove the source of that particular compilation error.

talonmies
  • 70,661
  • 34
  • 192
  • 269