2

I have a pointer which is dynamically allocated in device,then how can I copy it from device to host.

#include <stdio.h>

#define cudaSafeCall(call){   \
  cudaError err = call;       \
  if(cudaSuccess != err){     \
    fprintf(stderr, "%s(%i) : %s.\n", __FILE__, __LINE__, cudaGetErrorString(err));   \
    exit(EXIT_FAILURE);       \
}}
#define cudaCheckErr(errorMessage) {    \
  cudaError_t err = cudaGetLastError(); \
  if(cudaSuccess != err){               \
    fprintf(stderr, "%s(%i) : %s : (code %d) %s.\n", __FILE__, __LINE__, errorMessage, err, cudaGetErrorString(err)); \
    exit(EXIT_FAILURE);                 \``
}}

struct num{  
int *a;
int b;
};

__device__ struct num *gun;
int main()
{
  int i;
  char c[100];
  struct num *dun,*cun; 
  cudaSafeCall(cudaSetDevice(1));
  cun=(struct num*)malloc(10*sizeof(struct num));  
  cudaSafeCall(cudaMalloc(&dun,10*sizeof(struct num)));
  cudaSafeCall(cudaMemcpyToSymbol(gun,&dun,sizeof(struct num*)));
  __global__ void kernel();
  kernel<<<1,10>>>();
  cudaSafeCall(cudaDeviceSynchronize());  
  cudaCheckErr(c);
  cudaSafeCall(cudaMemcpyFromSymbol(&dun,gun,sizeof(struct num*)));
  cudaSafeCall(cudaMemcpy(cun,dun,10*sizeof(struct num),cudaMemcpyDeviceToHost));
  for(i=0;i<10;i++) cudaSafeCall(cudaMalloc(&csu[i].a,10*sizeof(int)));  
  cudaSafeCall(cudaGetSymbolAddress((void**)csu[0].a,(void**)gun[0].a));  
  for(i=0;i<10;i++)   cun[i].a=(int*)malloc(10*sizeof(int));
  for(i=0;i<10;i++)   cudaSafeCall(cudaMemcpy(cun[i].a,dun[i].a,10*sizeof(int),cudaMemcpyDeviceToHost));
  printf("%d ",cun[8].b);
  printf("%d ",cun[8].a[8]);
  cudaSafeCall(cudaFree(dun));
  free(cun);
}

__global__ void kernel()
{
  int i;
  int tid=threadIdx.x;
  gun[tid].b=tid;
  gun[tid].a=(int*)malloc(10*sizeof(int));/*this is dynamically allocated in device.*/
  for(i=0;i<10;i++)
    gun[tid].a[i]=tid+i;
}

In this program, it always comes to a "segmentation fault" in

cudaSafeCall(cudaMemcpy(cun[i].a,dun[i].a,10*sizeof(int),cudaMemcpyDeviceToHost))

Why? And what can I do to copy this data from device to host?

talonmies
  • 70,661
  • 34
  • 192
  • 269
helena
  • 23
  • 3
  • The dynamic memory allocation is supported only for latest cuda devices (since 2.0 version). If you want to compile for this architecture, add this nvcc parameter `-arch=sm_20` or `-arch=sm_21`. – Yappie Nov 30 '11 at 07:51
  • Yes,I have used this parameter,and my cun[8].b is correct.Just don't know how to pass element a in the struct. – helena Nov 30 '11 at 08:25

1 Answers1

0

The problem you have is that you are trying to use device pointer indirection in host code, which is illegal. In your example

cudaMemcpy(cun[i].a,dun[i].a,10*sizeof(int),cudaMemcpyDeviceToHost)

dun contains a device pointer, so dun[i].a implies indirection of dun[i] to read the value of a. That is not a valid host memory address and so a seg fault results. You have actually already copied the pointers to the heap memory your kernel allocated when you do this:

cudaMemcpy(cun,dun,10*sizeof(struct num),cudaMemcpyDeviceToHost);

so following that code with

int ** a_h = (int **)malloc(10 * sizeof(int *)); // to hold heap pointers
for(i=0;i<10;i++) {
   a_h[i] = cun[i].a; // save heap pointer
   cun[i].a=(int*)malloc(10*sizeof(int));                 
   cudaMemcpy(cun[i].a,a_h[i],10*sizeo(int),cudaMemcpyDeviceToHost); // copy heap to host
}

should safely copy the heap memory you allocated back to the host.

talonmies
  • 70,661
  • 34
  • 192
  • 269
  • It's work!Thanks very much.This problem has puzzled me for a long time.And the a_h is not an int* but an int**.I modified that line. It works correctly. – helena Dec 01 '11 at 01:45
  • If this solved your problem, could you please consider [accepting my answer](http://meta.stackexchange.com/a/5235/163653)? – talonmies Dec 01 '11 at 05:58
  • I'd like to ask you more questions.If element a is an int***,how to copy? – helena Dec 02 '11 at 08:19