I am trying to write my own implementation of reductive search for maximum number. This is my first CUDA C program and I ran into some errors that I cannot understand yet.
my code:
__global__
void reduce (box * d_in_data, int insize, box * d_out_data)
{
/*
for example:
array size = 25200 (size variable, keep in d_in_data)
gridDim = 252
blockDim = 10
*/
uint32_t tid = threadIdx.x;
//__shared__ box sdata[insize / gridDim.x]; // 25200 / 252 = 100
extern __shared__ box sdata[]; // 25200 / 252 = 100
/*spt its 'size per thread' for filling sdata*/
/* (25200 / 252) / 10 */ /* each thread should be copy 10 elements from d_in_data to sdata */
uint32_t spt = (insize / gridDim.x) / blockDim.x;
box (* sh_ptr)[blockDim.x][spt] = (box (*) [blockDim.x][spt]) &sdata;
for (int i = 0;i < spt;i++){
(*sh_ptr)[tid][i] = d_in_data[blockIdx.x * (insize / gridDim.x) + (tid * blockDim.x + i)];
}
__syncthreads();
box temp = (*sh_ptr)[tid][0];
for (int i = 0;i < spt;i++){
box i_box = (*sh_ptr)[tid][i];
temp = temp.s < i_box.s ? i_box : temp;
}
(*sh_ptr)[tid][0] = temp;
__syncthreads();
box (* d_out_ptr)[gridDim.x][blockDim.x] = (box (*)[gridDim.x][blockDim.x]) d_out_data;
(*d_out_ptr)[blockIdx.x][tid] = (*sh_ptr)[tid][0];
}
__shared__ box sdata[insize / gridDim.x];
- why "here cannot be used as a constant" ? how to set the size explicitly?box (* sh_ptr)[blockDim.x][spt] = (box (*) [blockDim.x][spt]) &sdata;
- "error: expression must have a constant value" , "note: attempt to access run-time storage", "note: the value of variable "spt"". How to write correctly in my case?uint32_t spt = (insize / gridDim.x) / blockDim.x;
- "here cannot be used as a constant"box (* d_out_ptr)[gridDim.x][blockDim.x] = (box (*)[gridDim.x][blockDim.x]) d_out_data;
- "error: expression must have a constant value" , "note: attempt to access run-time storage", It's the same here.
Please tell me where I made a mistake in each paragraph.