1

I am trying to klearn how to program for nVidia cards. Here is my code:

__global__ void add_one(int* i)
{
    i[0]++;
}

template<class TYPE>
void gpu_load(TYPE data)
{
    int size = 1;
    cudaMalloc( (void**) &data, size * sizeof(TYPE));
}

template<class TYPE>
void copy_to_gpu(TYPE* cpu_var, TYPE* gpu_var)
{
    int size = 1;
    cudaMemcpy(  gpu_var, cpu_var, size * sizeof(TYPE), cudaMemcpyHostToDevice); 
}

template<class TYPE>
void copy_to_cpu(TYPE* cpu_var, TYPE* gpu_var)
{
    int size = 1;
    cudaMemcpy( gpu_var, cpu_var, size * sizeof(TYPE), cudaMemcpyDeviceToHost);
}

int main() 
{
    int gpu_i[1];
    int cpu_i[1];

    cpu_i[0] = 5;

    gpu_load(cpu_i);
    copy_to_gpu(cpu_i, gpu_i);

    add_one<<<1, 1>>>(gpu_i);

    int res[1];

    copy_to_cpu(res, gpu_i);

    std::cout << res[0];
}

Why the cout doesn't display 5+1 and displays 0 instead?

I tried my best to make it work... It seems like nothing happens...?

SkyRipper
  • 155
  • 5
  • 15

2 Answers2

0

The first parameter to cudaMemcpy is always the destination, but in copy_to_cpu you pass gpu_var first.

Alan Stokes
  • 18,815
  • 3
  • 45
  • 64
0
  • You're passing cpu_i to your cudaMalloc routine. This is not what you want.
  • The gpu_i pointer needs to be something that is modifiable by your cudaMalloc routine, so we need to pass the address of it to that routine, as a pointer.
  • You had your parameters reversed on the copy_to_cpu routine.

If the following code doesn't work for you, add proper cuda error checking. It's possible there is a problem with your system config as well:

#include <iostream>

__global__ void add_one(int* i)
{
    i[0]++;
}

template<class TYPE>
void gpu_load(TYPE* &data)
{
    int size = 1;
    cudaMalloc( (void**) &data, size * sizeof(TYPE));
}

template<class TYPE>
void copy_to_gpu(TYPE* cpu_var, TYPE* gpu_var)
{
    int size = 1;
    cudaMemcpy(  gpu_var, cpu_var, size * sizeof(TYPE), cudaMemcpyHostToDevice);
}

template<class TYPE>
void copy_to_cpu(TYPE* cpu_var, TYPE* gpu_var)
{
    int size = 1;
    cudaMemcpy( cpu_var, gpu_var, size * sizeof(TYPE), cudaMemcpyDeviceToHost);
}

int main()
{
    int *gpu_i;
    int cpu_i[1];

    cpu_i[0] = 5;

    gpu_load(gpu_i);
    copy_to_gpu(cpu_i, gpu_i);

    add_one<<<1, 1>>>(gpu_i);

    int res[1];

    copy_to_cpu(res, gpu_i);

    std::cout << res[0];
}
Community
  • 1
  • 1
Robert Crovella
  • 143,785
  • 11
  • 213
  • 257