I'm using CUDA C/C++, my CUDA compilation tools version is 7.0
I have a struct and a class:
struct racer{
bool active;
int Distance;
int currentPosition;
};
The class:
class Game{
public:
vector <racer> racersVector;
bool runTimeStep();
}
And I have a class function that modifies "racersVector":
bool Game::runTimeStep(){
//this is 1 timestep, this is the part of code to be run on the GPU with "racersVector.size()" blocks/threads in parallel
//-----------------------
for (int j = 0; j < racersVector.size(); j++){
racersVector[j].currentPosition++;
if (racersVector[j].currentPosition >= racersVector[j].Distance)
racersVector[j].active = false;
}
//-----------------------
}
So, from my Main, I use the class in this way:
Game game1;
game1.initialise();
while(true){
game1.runTimeStep();
}
I'm trying to use CUDA for the part of code that is commented, the idea is to copy the class object or the "vector racersVector" instance to the device, run "computeTimeStep" (a CUDA kernel that I want to implement) as many times as I want and then, when I want to see the state of my vector, copy back the vector from the device to the host, so the ideal would be something like this:
Game game1;
game1.initialise();
here-the-code-to-copy-game1.racersVector-to-device
computeTimeStep <<<N,1>>> ();
computeTimeStep <<<N,1>>> ();
computeTimeStep <<<N,1>>> ();
computeTimeStep <<<N,1>>> ();
computeTimeStep <<<N,1>>> ();
copyBackToHost (game1.racersVector);
game1.printInfo();
So I modified my main program:
int main()
{
Game game1;
game1.initialise();
//trying to copy game1.racersVector to device
vector<racer> *d_vec;
cudaMalloc((void **)&d_vec, sizeof(game1.racersVector));
cudaMemcpy(d_vec, &game1.racersVector, sizeof(game1.racersVector), cudaMemcpyHostToDevice);
If I understood it correctly, this should have copied "game1.racersVector" to the device
The idea is to create a CUDA function (kernel) that does 1 timestep with the "vector racersVector", But when I try create a CUDA kernel that takes a vector pointer as a parameter:
__global__ void computeTimeStep (vector<racer> *cud){
cud->resize(4);
}
nvcc says:
cudaex2.cu(46): error: calling a __host__ function("std::vector<racer, std::allocator<racer> > ::resize") from a __global__ function("computeStep") is not allowed
How can I copy "racersVector" to the device and then work with that vector using a CUDA kernel?