I've been trying to transform some cuda/C code into a more OO code, but my goal doesn't seem to be easy to achieve for my current understanding of the cuda functioning mechanism. I haven't been able to find good a explanation either on this situation. It might not be possible after all.
I have a global object of class myClass holding an array to be filled in a kernel.
How should the methods in myClass be defined so that the array and boolean members are visible from device and the array can then be copied back to host? I am using cuda 7.5 and the compute capability of my card is 3.5.
This is a tentative structure describing the situation:
#include <cstdio>
#include <cuda.h>
#include <cuda_runtime.h>
class myClass
{
public:
bool bool_var; // Set from host and readable from device
int data_size; // Set from host
__device__ __host__ myClass();
__device__ __host__ ~myClass();
__host__ void setValues(bool iftrue, int size);
__device__ void dosomething(int device_parameter);
__host__ void export();
// completely unknown methods
__host__ void prepareDeviceObj();
__host__ void retrieveDataToHost();
private:
int *data; // Filled in device, shared between threads, at the end copied back to host for data output
};
__host__ __device__ myClass::myClass()
{
}
__host__ __device__ myClass::~myClass()
{
#ifdef __CUDACC__
if(bool_var)
cudaFree(data);
#else
free(data);
#endif
}
__host__ void myClass::setValues(bool iftrue, int size)
{
bool_var = iftrue;
data_size = size;
}
__device__ void myClass::dosomething(int idx)
{
int toadd = idx+data_size;
atomicAdd(&data[idx], toadd); // data should be unique among threads
}
__global__ void myKernel(myClass obj)
{
const int idx = blockIdx.x*blockDim.x + threadIdx.x;
if(idx < obj.data_size)
{
if(!obj.bool_var)
printf("Object is not up to any task here!");
else
{
printf("Object is ready!");
obj.dosomething(idx);
}
}
}
myClass globalInstance;
int main(int argc, char** argv)
{
int some_number = 40;
globalInstance.setValues(true, some_number);
globalInstance.prepareDeviceObj(); // unknown
myKernel<<<1,some_number>>>(globalInstance); // how to pass the object?
globalInstance.retrieveDataToHost(); // unknown
globalInstance.export();
exit(EXIT_SUCCESS);
}