-3

when I use this code in cuda it only increase a[0],a[1],a[2] other was 0 (didn't increased)

   __global__ void inc2(int * a){
        int i= threadIdx.x;
        i%=10;
        atomicAdd(&(a[i]),1);
    }

when I write

__global__ void inc2(int * a){
    int i= threadIdx.x;
    i%=10;
    atomicAdd(&(a[6]),1);
}

it didn't increase a[6]

what's wrong? sorry

all of the code is this

__global__ void inc2(int * a){
    int i= threadIdx.x;
    i%=10;
    atomicAdd(&(a[6]),1);
}
int main()
{

    //=============================================
    int aaa[10]={0};
    int *q;
    cudaMalloc((void**)&q,100);
    cudaMemcpy(q,aaa,10,cudaMemcpyHostToDevice);
    inc2<<<100,100>>>(q);
    cudaMemcpy(aaa,q,10,cudaMemcpyDeviceToHost);
    printf("\n\n");
    for(int i=0;i<10;i++){
        printf("%d\t",aaa[i]);
    }
    cudaFree(q);
    return 0;
 }
Prof. Hell
  • 729
  • 12
  • 19

1 Answers1

3

First of all, you should use proper cuda error checking any time you are having trouble with a CUDA code.

You may be confused about the size parameters associated with functions like cudaMalloc or cudaMemcpy. They represent a size in bytes. So this:

cudaMemcpy(aaa,q,10,cudaMemcpyDeviceToHost);

only transfers 10 bytes, which is 2.5 int quantities. If you want to see the modified value of a[6], you're going to have to transfer more than the first 2 int quantities in a.

If you modify these lines:

cudaMemcpy(q,aaa,40,cudaMemcpyHostToDevice);
                 ^^

and:

cudaMemcpy(aaa,q,40,cudaMemcpyDeviceToHost);
                 ^^

I think you'll have better results.

Community
  • 1
  • 1
Robert Crovella
  • 143,785
  • 11
  • 213
  • 257
  • `sizeof(int)*10` instead of `40` is generally cleaner: i.e. it is clearer what you have in mind and where the numbers come from. – CygnusX1 Aug 26 '14 at 08:11