This is my kernel function and it does simple work:
&
every item with blockIdx.x
to filter even and odd:
__global__
void g_compact(const unsigned int* v_array, unsigned int* compact_array, int size)
{
const int p_index = blockIdx.x * size + threadIdx.x;
if ((v_array[threadIdx.x] & 1) == blockIdx.x)
{
compact_array[p_index]= 1;
}
else
{
compact_array[p_index]= 0;
}
}
However, it produce random result every time I run the program, like
1 0 1625730008 32767 1625730024 32767 4197775 0 0 0 4197470 0 0 0 2525809656 32630 1 0 1625729712 32767
What confuse me is that the result is not 0
or 1
since my if
and else
should cover every situation.
Could someone help me out of this?
Total program:
#include <iostream>
void print_array(const unsigned int* v_array, int size)
{
for (int i = 0; i < size; ++i)
{
std::cout<<v_array[i]<<" ";
}
std::cout<<std::endl;
}
__global__
void g_compact(const unsigned int* v_array, unsigned int* compact_array, int size)
{
const int p_index = blockIdx.x * size + threadIdx.x;
if (true)
{
compact_array[p_index]= 1;
}
else
{
compact_array[p_index]= 0;
}
}
int main(int argc, char const *argv[])
{
unsigned int *d_in;
unsigned int *d_out;
cudaMalloc(&d_in, sizeof(unsigned int) * 10);
cudaMalloc(&d_out, sizeof(unsigned int) * 20);
unsigned int h_array[10] = {
1, 2, 3, 4,
5, 6, 7, 8,
9, 10
};
cudaMemcpy(d_in, h_array, sizeof(unsigned int) * 10, cudaMemcpyHostToDevice);
g_compact<<<2, 10>>>(h_array, d_out, 10);
unsigned int h_out[20];
cudaMemcpy(h_out, d_out, sizeof(unsigned int) * 20, cudaMemcpyDeviceToHost);
print_array(h_out, 20);
return 0;
}