I have been trying to put in place a CUDA code (taken in part from Fractal Image Compression by Yuval Fisher) that has a double pointer to a 2D image. After taking care of the pointer to pointer allocation in this , I am still getting segmentation fault error along with "Warning: Cannot tell what pointer points to, assuming global memory space" warning. Here is the entire code. I am also posting it here as under: (My apologies for duplicating the posted code)
#include <cuda.h>
#include <stdio.h>
#include <stdlib.h>
#define hsize 256
#define vsize 256
#define IMAGE_TYPE unsigned char
__global__ void kernel(IMAGE_TYPE matrixin[][hsize], IMAGE_TYPE matrixout[][hsize]) {
int tid=threadIdx.x;
int bid=blockIdx.x;
matrixout[bid][tid]=matrixin[bid][tid];
}
int fatal(char* s) {
fprintf(stderr,"%s\n",s);
return 1;
}
#define matrix_allocate(matrix,hsize,vsize,TYPE) {\
TYPE *imptr;\
int _i;\
matrix=(TYPE**)malloc((vsize)*sizeof(TYPE*));\
imptr=(TYPE*)malloc((long)(hsize)*(long)(vsize)*sizeof(TYPE));\
if(imptr==NULL)\
fatal("\nNo memory in matrix allocate.");\
for(_i=0;_i<vsize;++_i,imptr+=hsize)\
matrix[_i] = imptr;\
}\
int main() {
typedef IMAGE_TYPE IMarray[vsize][hsize];
IMAGE_TYPE **hin_image,**hout_image;
IMarray *din_image,*dout_image;
//allocate host memory
matrix_allocate(hin_image,hsize,vsize,IMAGE_TYPE)
for(int i=0;i<vsize;i++)
for(int j=0;j<hsize;j++)
hin_image[i][j]='a';
matrix_allocate(hout_image,hsize,vsize,IMAGE_TYPE)
//allocate device memory
cudaMalloc((void**)&din_image,(vsize*hsize)*sizeof(IMAGE_TYPE));
cudaMalloc((void**)&dout_image,(vsize*hsize)*sizeof(IMAGE_TYPE));
cudaMemcpy(din_image,hin_image, (vsize*hsize)*sizeof(IMAGE_TYPE),cudaMemcpyHostToDevice);
dim3 threads(hsize,1,1);
dim3 blocks(vsize,1,1);
kernel<<<blocks,threads>>>(din_image,dout_image);
cudaMemcpy(hout_image,dout_image,(vsize*hsize)*sizeof(IMAGE_TYPE),cudaMemcpyDeviceToHost);
for(int i=0;i<10;i++) {
printf("\n");
for(int j=0;j<10;j++)
printf("%c\t",hout_image[i][j]);
}
printf("\n");
cudaFree(din_image);
cudaFree(dout_image);
free(hin_image);
free(hout_image);
return 0;
}
I intend to know what is wrong with the standard 2D access of image inside the kernel function. Any help would be highly welcome.