I am running a cuda vec addtion program and getting zeros as the output of its sum later. I have tried debugging but am not able to get to the problem at hand. It should be adding the numbers but is rather simply printing out zeros which I am not able to understand why is happening.
I have tried doing everything to the code and still I am not getting any output.
using namespace std;
__global__ void vecADDKernal(double *A, double *B, double *C, int n){
int id = blockIdx.x*blockDim.x+threadIdx.x;
if(id<n) C[id] = A[id] + B[id];
}
int main( ){
int n = 1048576;
int size = n*sizeof(double);
double *d_A, *d_B;
double *d_C;
double *h_A, *h_B, *h_C;
h_A = (double*)malloc(size);
h_B = (double*)malloc(size);
h_C = (double*)malloc(size);
cudaMalloc(&d_A, size);
cudaMalloc(&d_B, size);
cudaMalloc(&d_C, size);
int i;
// Initialize vectors on host
for( i = 0; i < n; i++ ) {
h_A[i] = 2*i;
h_B[i] = 3*i;
}
cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
int blockSize = 256;
// Number of thread blocks in grid
int gridSize = ceil(n/blockSize);
vecADDKernal<<<gridSize, blockSize>>>(d_A, d_B, d_C, n);
cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
double sum = 0;
for(int a = 0; a<n; a++) {
sum = h_C[a];
cout<<h_C[a]<<endl;
}
cout<<"HI "<< sum <<endl;
cudaFree(d_A);
cudaFree(d_B);
cudaFree(d_C);
return 0;
}