I'm trying to convert a 3x3 rgb "image" to grayscale. The image is just a 3d array. You can think of it being a 2d image, with each pixel having a 1x3 array of rgb values.
To do this, I've basically converted the 3d array into a 1D array. There is no issue with the conversion. I basically send this vector array to the cuda device and it returns me a 1x9 (one grayscale value for each pixel) processed vector array. I've tried to do this by first creating 3 blocks, each for RGB. Each block has 3x3 threads where each thread targets either the red, blue, or green value (determined by the block index) of a pixel. I then multiply each red, blue, or green value by a corresponding factor and add that value to the corresponding pixel of the output array.
However, what I've been getting is an array that is comprised of all zeroes.
#include <stdio.h>
//
__global__ void RGBToGrayScale(float *d_out, float *d_in, int rowCount, int colCount) {
float grayScaleAddition;
int temp = d_in[blockIdx.x * rowCount * colCount + threadIdx.x * colCount + threadIdx.y];
switch(blockIdx.x) {
case 0:
grayScaleAddition = 0.299 * temp;
break;
case 1:
grayScaleAddition = 0.587 * temp;
break;
case 2:
grayScaleAddition = 0.114 * temp;
break;
}
d_out[threadIdx.x * colCount + threadIdx.y] += grayScale;
}
int main() {
int image[3][3][3] = { //3 rows, columns, and 3 rgb values for each pixel
{{1, 3, 2}, {4, 5, 6}, {7, 8, 9}},
{{10, 11, 12}, {13, 14, 15}, {16, 17, 18}},
{{19, 20, 21}, {22, 23, 24}, {25, 26, 27}}
};
const int IMAGE_ROW_COUNT = 3;
const int IMAGE_COLUMN_COUNT = 3;
const int ARRAY_BYTES = IMAGE_ROW_COUNT * IMAGE_COLUMN_COUNT * 3 * sizeof(float);
//converting image to a 1D array
float* h_in = (float*)malloc(ARRAY_BYTES);
float* h_out = (float*)malloc(ARRAY_BYTES / 3);
for (int i = 0; i < IMAGE_ROW_COUNT; ++i) {//no issue with conversion, checked manually
for (int j = 0; j < IMAGE_COLUMN_COUNT; ++j) {
for (int k = 0; k < 3; ++k) {
h_in[k * IMAGE_ROW_COUNT * IMAGE_COLUMN_COUNT + i * IMAGE_COLUMN_COUNT + j] = float(image[i][j][k]);
}
}
}
//declare GPU memory pointers
float* d_in;
float* d_out;
cudaMalloc((void **) &d_in, ARRAY_BYTES);
cudaMalloc((void **) &d_out, ARRAY_BYTES / 3);
//transfer array into GPU
cudaMemcpy(d_in, h_in, ARRAY_BYTES, cudaMemcpyHostToDevice);
RGBToGrayScale<<<3, dim3(IMAGE_ROW_COUNT, IMAGE_COLUMN_COUNT)>>>(d_out, d_in, IMAGE_ROW_COUNT, IMAGE_COLUMN_COUNT);
//copy back the result array to CPU
cudaMemcpy(h_out, d_out, ARRAY_BYTES, cudaMemcpyDeviceToHost);
for (int i = 0; i < IMAGE_ROW_COUNT * IMAGE_COLUMN_COUNT; ++i) {
printf("%f", h_out[i]);
printf(((i % 3) != 2) ? "\t" : "\n");
}
cudaFree(d_in);
cudaFree(d_out);
free(h_in);
free(h_out);
return 0;
}