copy data which is allocated in device from device to host

Question

I have a pointer which is dynamically allocated in device,then how can I copy it from device to host.

#include <stdio.h>

#define cudaSafeCall(call){   \
  cudaError err = call;       \
  if(cudaSuccess != err){     \
    fprintf(stderr, "%s(%i) : %s.\n", __FILE__, __LINE__, cudaGetErrorString(err));   \
    exit(EXIT_FAILURE);       \
}}
#define cudaCheckErr(errorMessage) {    \
  cudaError_t err = cudaGetLastError(); \
  if(cudaSuccess != err){               \
    fprintf(stderr, "%s(%i) : %s : (code %d) %s.\n", __FILE__, __LINE__, errorMessage, err, cudaGetErrorString(err)); \
    exit(EXIT_FAILURE);                 \``
}}

struct num{  
int *a;
int b;
};

__device__ struct num *gun;
int main()
{
  int i;
  char c[100];
  struct num *dun,*cun; 
  cudaSafeCall(cudaSetDevice(1));
  cun=(struct num*)malloc(10*sizeof(struct num));  
  cudaSafeCall(cudaMalloc(&dun,10*sizeof(struct num)));
  cudaSafeCall(cudaMemcpyToSymbol(gun,&dun,sizeof(struct num*)));
  __global__ void kernel();
  kernel<<<1,10>>>();
  cudaSafeCall(cudaDeviceSynchronize());  
  cudaCheckErr(c);
  cudaSafeCall(cudaMemcpyFromSymbol(&dun,gun,sizeof(struct num*)));
  cudaSafeCall(cudaMemcpy(cun,dun,10*sizeof(struct num),cudaMemcpyDeviceToHost));
  for(i=0;i<10;i++) cudaSafeCall(cudaMalloc(&csu[i].a,10*sizeof(int)));  
  cudaSafeCall(cudaGetSymbolAddress((void**)csu[0].a,(void**)gun[0].a));  
  for(i=0;i<10;i++)   cun[i].a=(int*)malloc(10*sizeof(int));
  for(i=0;i<10;i++)   cudaSafeCall(cudaMemcpy(cun[i].a,dun[i].a,10*sizeof(int),cudaMemcpyDeviceToHost));
  printf("%d ",cun[8].b);
  printf("%d ",cun[8].a[8]);
  cudaSafeCall(cudaFree(dun));
  free(cun);
}

__global__ void kernel()
{
  int i;
  int tid=threadIdx.x;
  gun[tid].b=tid;
  gun[tid].a=(int*)malloc(10*sizeof(int));/*this is dynamically allocated in device.*/
  for(i=0;i<10;i++)
    gun[tid].a[i]=tid+i;
}

In this program, it always comes to a "segmentation fault" in

cudaSafeCall(cudaMemcpy(cun[i].a,dun[i].a,10*sizeof(int),cudaMemcpyDeviceToHost))

Why? And what can I do to copy this data from device to host?

The dynamic memory allocation is supported only for latest cuda devices (since 2.0 version). If you want to compile for this architecture, add this nvcc parameter `-arch=sm_20` or `-arch=sm_21`. — Yappie, Nov 30 '11 at 07:51
Yes,I have used this parameter,and my cun[8].b is correct.Just don't know how to pass element a in the struct. — helena, Nov 30 '11 at 08:25

talonmies · Accepted Answer · 2011-12-01T05:56:51.310

0

The problem you have is that you are trying to use device pointer indirection in host code, which is illegal. In your example

cudaMemcpy(cun[i].a,dun[i].a,10*sizeof(int),cudaMemcpyDeviceToHost)

dun contains a device pointer, so dun[i].a implies indirection of dun[i] to read the value of a. That is not a valid host memory address and so a seg fault results. You have actually already copied the pointers to the heap memory your kernel allocated when you do this:

cudaMemcpy(cun,dun,10*sizeof(struct num),cudaMemcpyDeviceToHost);

so following that code with

int ** a_h = (int **)malloc(10 * sizeof(int *)); // to hold heap pointers
for(i=0;i<10;i++) {
   a_h[i] = cun[i].a; // save heap pointer
   cun[i].a=(int*)malloc(10*sizeof(int));                 
   cudaMemcpy(cun[i].a,a_h[i],10*sizeo(int),cudaMemcpyDeviceToHost); // copy heap to host
}

should safely copy the heap memory you allocated back to the host.

edited Dec 01 '11 at 05:56

answered Nov 30 '11 at 12:21

talonmies

70,661
34
192
269

It's work!Thanks very much.This problem has puzzled me for a long time.And the a_h is not an int* but an int**.I modified that line. It works correctly. – helena Dec 01 '11 at 01:45
If this solved your problem, could you please consider [accepting my answer](http://meta.stackexchange.com/a/5235/163653)? – talonmies Dec 01 '11 at 05:58
I'd like to ask you more questions.If element a is an int***,how to copy? – helena Dec 02 '11 at 08:19

copy data which is allocated in device from device to host

1 Answers1

Linked