I am trying to use managed variable in a CUDA program and I am getting a segmentation fault when trying to set the managed variable on the host side. I am doing exactly the same as mentioned in the documentation here(http://docs.nvidia.com/cuda/cuda-c-programming-guide/#managed-qualifier). Why is this happening?
#include <cuda.h>
#define THREADS_PER_BLOCK 32
#define BLOCKS_PER_SM 1
#define MB(x) ((x) << 20)
__device__ __managed__ int val = 0;
__global__ void test_kernel(char *src)
{
src[0] = val;
}
int main(int argc, char *argv[])
{
char *data;
int size = 2; // 2 MB
val = 100; //<--- seg fault gone if I comment this line
cudaMallocManaged(&data, MB(size));
test_kernel<<<BLOCKS_PER_SM, THREADS_PER_BLOCK>>>(data);
cudaDeviceSynchronize();
cudaFree(data);
return 0;
}