cudaMemcpyAsync only one member of a struct from device to host

Question

I have a struct with multiple members and I want to do some operations on parts of the member by GPUs. To make the size of communication as small as possible, I hope to copy back only those members which have been modified. Can cuda do that?

struct nodeInfo;

typedef struct nodeInfo
{
  int x;
  int y;
}nodeProp;

int main(int argc, char* argv[]){
  int ngpus;
  CHECK(cudaGetDeviceCount(&ngpus));
  cudaStream_t stream[ngpus];

  nodeProp *Nodes;
  nodeProp *gpuNodes[ngpus];

  int rankSize = 10;
  int deviceSize = rankSize/ngpus;

  CHECK(cudaMallocHost((void**)&Nodes,rankSize*sizeof(nodeProp)));

  for(int i = 0; i < ngpus; i++)
    {
      cudaSetDevice(i);
      cudaStreamCreate(&stream[i]);
      CHECK(cudaMalloc((void**)&gpuNodes[i],deviceSize*sizeof(nodeProp)));
      CHECK(cudaMemcpyAsync(gpuNodes[i],Nodes+i*deviceSize,deviceSize*sizeof(nodeProp),cudaMemcpyHostToDevice,stream[i]));
    }

  for(int i = 0; i < ngpus; i++)
    {
      cudaSetDevice(i);
      kernel_x_Operation<<<grid_size,block_size,0,stream[i]>>>(gpuNodes[i]);//Some operation on gpuNodes.x

     //How to write the memcpy function? Can I just copy one member of the struct back?
      CHECK((void*)cudaMemcpyAsync((Nodes+i*deviceSize)->x, gpuNodes[i]->x), sizeof(int)*deviceSize,cudaMemcpyDeviceToHost,stream[i]));

      cudaDeviceSynchronize();
   }
}

score 2 · Accepted Answer · edited May 23 '17 at 11:44

2

No, you can't do that. But you can achieve something similar by laying your data out as a Struct of Arrays instead of an Array of Structs.

Have a look at Structure of Arrays vs Array of Structures in cuda to see how this might even improve performance.

edited May 23 '17 at 11:44

Community

1
1

answered Oct 27 '15 at 19:28

m.s.

16,063
7
53
88

Thanks. I'll check it out. – Jackie Oct 27 '15 at 19:34

cudaMemcpyAsync only one member of a struct from device to host

1 Answers1