I am working on HD image processing using CUDA 7.5 with NVIDIA GEFORCE 840M on Ubuntu 14.04. I have a 3750*3750 image, and I have troubles to initialize an array of this dimension. The following code runs until k is about 4000.
__device__ int sImg;
__device__ int *B;
/* ############################### INITILIAZE ############################## */
__global__ void initialize(int *tab, int v, int s)
{
int k = blockDim.x*blockIdx.x + threadIdx.x ;
if ( k < s )
tab[k] = v;
}
/* ########################### The parent kernel ########################### */
__global__ void EDGE(int *A, int *C ,int h, int w, int dim, int nbScales)
{
sImg = dim*dim;
cudaMalloc((void**)&B,sImg*sizeof(int));
int threadsPerBlock = 256;
int blocksPerGrid = (sImg + threadsPerBlock -1) / threadsPerBlock;
/// I have troubles here, it does not complete the process
initialize<<<blocksPerGrid,threadsPerBlock>>>(B,0,sImg);
cudaDeviceSynchronize();
initialize<<<blocksPerGrid,threadsPerBlock>>>(C,0,sImg);
cudaDeviceSynchronize();
/// A transormation into frequency domain
FSDWT <<< 1 , nbScales >>> (A,B, h, w,dim,nbScales);
cudaDeviceSynchronize();
/// Tresholding the transform
Treshold<<<1,1>>>(B,C,dim*dim);
cudaDeviceSynchronize();
cudaFree(B);
}
/* ############################ call from host ############################ */
extern "C" void EDGE_host(int *A,int *B,int h,int w,int dim, int nbScales)
{
EDGE <<< 1 , 1 >>> (A,B, h, w,dim,nbScales);
}
Thank you very much