i think an array can be allocated on gpu ex. __device__ int device_array[100];
without using cudaMalloc as the lenght is known. But when i run the following code some irrelevant numbers are displayed. I examined a popular book for cuda and all examples in there uses cudaMalloc. A fixed size array can be used like this or it must be allocated with cudaMalloc?
__device__ int device_array[100];
__global__ void kernel() {
device_array[blockIdx.x] = blockIdx.x;
}
void call_kernel( int *host_array ) {
kernel<<<100,1>>>();
cudaMemcpy( host_array, device_array, 100 * sizeof( int ), cudaMemcpyDeviceToHost );
}
int main() {
int host_array[100];
call_kernel( host_array );
for ( int i = 0; i < 100; i++ )
cout << host_array[i] << endl;
}