I want to declare a global scope array at runtime in device memory that is accessible by all of the device functions. Doing the following satisfies the compiler:
__device__ float *g_array;
However, I am now getting compiler warnings at cudaMemcpy
and cudaMalloc
that I am accessing a device variable from a host function. I assumed that I need to get the actual device address, so I tried cudaGetSymbolAddress
however I get Invalid device symbol error
.
This is basically what I want to do:
__device__ float *g_array;
__constant__ int N;
__global__ void device_add_one()
{
for (int index = blockIdx.x * blockDim.x + threadIdx.x;
index < N;
index += blockDim.x * gridDim.x)
{
g_array[index] += 1;
{
}
void init_arrays(float *host_array, int size)
{
cudaMalloc(&g_array, (size * sizeof(float));
cudaMemcpy(g_array, host_array, (size * sizeof(float)), cudaMemcpyHostToDevice);
cudaMemcpyToSymbol(N, &size, sizeof(N));
device_add_one<< < 1, 254>> >();
cudaMemcpy(host_array, g_array, (size * sizeof(float), cudaMemcpyDeviceToHost);
}
Note: It has to work with computability 2.0