I am making one program in CUDA C, I've solved the problem on classic way, but I should to parallelise the code using CUDA. The problem is: print all vectors of length n, in which each element can have a value of [0 ... K] and for which the sum of all elements is SUM.
I've wrote the program in CUDA C, and the program should return to me the numbers of vectors who satisfied the condition. Now the problem is that I can't to find any error in the code, I don't know how to debug in Ubuntu, and the output always give me 0. I think that the global function doesn't execute. This is the code, I hope that someone will help me:
The code of the program is:
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <cuda.h>
#define MIN(x, y) (((x) < (y)) ? (x) : (y))
#define MYASSERT(condition) if(!(condition)) { return; }
__device__ void distribute2 (int vec[], int n, int k, int sum)
{
int i;
for (i = blockIdx.x * blockDim.x + threadIdx.x;; i < n; i+=blockDim.x * gridDim.x)
{
vec[i]=MIN(sum, k);
sum = sum- vec[i];
}
MYASSERT (sum == 0);
}
__global__ void moveUp (int vec[], int n, int k, int *res)
{
int i;
int collected = 0;
for(i=blockIdx.x * blockDim.x + threadIdx.x; i<n;i+=blockDim.x * gridDim.x)
{
if (collected == 0)
collected = vec[i];
else
{
if (vec[i] < k)
{
vec[i] =vec[i]+1;
distribute2 (vec, i, k, collected-1);
__synchthreads();
res[0]=res[0]+1;
}
else
{
collected += k;
}
}
}
MYASSERT(collected != 0);
}
int main()
{
int n=5;
int vec[n];
int k=5;
int sum=10;
int *res_h, *res_d;
size_t size = 1 * sizeof(int);
res_h = (int *)malloc(size);
cudaMalloc((void **) &res_d, size);
res_h[0] = 0;
cudaMemcpy(res_d, res_h, size, cudaMemcpyHostToDevice);
cudaDeviceProp devProp;
cudaGetDeviceProperties(&devProp, 0);
unsigned maxbytes = devProp.totalGlobalMem / 3;
unsigned max_samples = maxbytes / sizeof(int);
if (n > max_samples) n = max_samples;
printf("Using %d samples to estimate pi\n", n);
moveUp<<<256, 256>>>(vec, n, k, res_d);
cudaMemcpy(res_h, res_d, size, cudaMemcpyDeviceToHost);
printf("%d\n", res_h[0]);
return 0;
}