I'm new to CUDA and I'm trying to do my first project in it. I'm trying to push the image data to GPU, make it black and white there and write it in new image. But program gives me a black image instead of black and white version. What am I doing wrong? Width and height of image are 3840x2160.
__global__ void addMatrix(unsigned char *DataOut, unsigned char *DataIn)
{
int idx = threadIdx.x;
DataOut[idx] = (DataIn[idx] + DataIn[idx + 1] + DataIn[idx + 2]) / 3;
//
}
int main()
{
int iWidth, iHeight, iBpp, iHeightOut, iWidthOut;
vector<unsigned char> pDataIn;
vector<unsigned char> pDataOut;
int error1 = LoadBmpFile(L"3840x2160.bmp", iWidth, iHeight, iBpp, pDataIn);
if (error1 != 0 || pDataIn.size() == 0 || iBpp != 32)
{
std::cout << "erroror load input file!\n";
}
pDataOut.resize(pDataIn.size()/4);
unsigned int SizeIn, SizeOut;
unsigned char *devDatOut, *devDatIn, *PInData, *POutData;
int i = 0;
SizeIn = pDataIn.size();
SizeOut = pDataOut.size();
PInData = pDataIn.data();
POutData = pDataOut.data();
i = cudaMalloc((void**)&devDatIn, SizeIn * sizeof(unsigned char));
if(i != 0)
{
printf("cudaMalloc __e FAIL! Code: %d\n", i);
_getch();
}
i = cudaMalloc((void**)&devDatOut, SizeOut * sizeof(unsigned char));
if(i != cudaSuccess)
printf("cudaMalloc __e FAIL! Code: %d\n", i);
i = cudaMemcpy(devDatIn, PInData, SizeIn * sizeof(unsigned char), cudaMemcpyHostToDevice);
if(i != cudaSuccess)
printf(" cudaMemcpy __e FAIL! Code: %d\n", i);
i = cudaMemcpy(devDatOut, POutData, SizeOut * sizeof(unsigned char), cudaMemcpyHostToDevice);
if(i != cudaSuccess)
printf(" cudaMemcpy __e FAIL! Code: %d\n", i);
dim3 gridSize = dim3(1, 1, 1);
dim3 blockSize = dim3(SizeIn, 1, 1);
addMatrix<<<gridSize, blockSize>>>(devDatIn, devDatOut);
if ( i == cudaGetLastError() )
{
printf( "Error! %d\n", cudaGetLastError() );
_getch;
}
cudaEvent_t syncEvent;
cudaEventCreate(&syncEvent);
cudaEventRecord(syncEvent, 0);
cudaEventSynchronize(syncEvent);
cudaMemcpy(POutData, devDatOut, SizeOut * sizeof(unsigned char), cudaMemcpyDeviceToHost);
cudaEventDestroy(syncEvent);
i = WriteBmpFile(L"3840x2160_test2.bmp", iWidth, iHeight, 8, pDataOut.size(), pDataOut.data(), false);
if(i != 0)
printf(" cudaMemcpy __e FAIL! Code: %d\n", i);
cudaFree(devDatOut);
cudaFree(devDatIn);
}
EDIT 1: