I have the following code snippet:
// main.cu
#define thread 16
dim3 blocks( ( width + thread - 1 ) / thread, ( height + thread - 1 ) / thread );
dim3 threads( thread, thread );
kernel<<<blocks, threads>>>( dev_data, width, height );
// kernel function
__global__ void kernel( uchar *data, int width, int height ) {
// map from threadIdx/blockIdx to pixel position
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
int offset = x + y * blockDim.x * gridDim.x;
data[offset * 3 + 0] = 255;
data[offset * 3 + 1] = 0;
data[offset * 3 + 2] = 0;
}
When i execute it only near half of the pixels getting blue. What am i doing wrong? I think its about indexing.