I am trying to klearn how to program for nVidia cards. Here is my code:
__global__ void add_one(int* i)
{
i[0]++;
}
template<class TYPE>
void gpu_load(TYPE data)
{
int size = 1;
cudaMalloc( (void**) &data, size * sizeof(TYPE));
}
template<class TYPE>
void copy_to_gpu(TYPE* cpu_var, TYPE* gpu_var)
{
int size = 1;
cudaMemcpy( gpu_var, cpu_var, size * sizeof(TYPE), cudaMemcpyHostToDevice);
}
template<class TYPE>
void copy_to_cpu(TYPE* cpu_var, TYPE* gpu_var)
{
int size = 1;
cudaMemcpy( gpu_var, cpu_var, size * sizeof(TYPE), cudaMemcpyDeviceToHost);
}
int main()
{
int gpu_i[1];
int cpu_i[1];
cpu_i[0] = 5;
gpu_load(cpu_i);
copy_to_gpu(cpu_i, gpu_i);
add_one<<<1, 1>>>(gpu_i);
int res[1];
copy_to_cpu(res, gpu_i);
std::cout << res[0];
}
Why the cout doesn't display 5+1 and displays 0 instead?
I tried my best to make it work... It seems like nothing happens...?