I am trying to do a simple test with cudaMemcpy3D using CUDA 5.5. I have searched around and found different examples and read the appropriate Runtime API documentation, but can't figure out what I am doing wrong. The following code compiles just fine, but when I try to run it, I get a segmentation fault on the cudaMemcpy3D
call. I tried running this with cuda-gdb
, but can't get any useful information out of it to tell me what is wrong (maybe because I'm not that familiar with gdb
/cuda-gdb
usage). Any help in figuring out where my error is would be greatly appreciated.
#include <cstdio>
#include <cuda_runtime.h>
int main() {
static const size_t NX = 60;
static const size_t NY = 60;
static const size_t NZ = 60;
float* h_data = new float[NX * NY * NZ];
for(unsigned int i = 0; i < NX * NY * NZ; ++i) {
h_data[i] = static_cast<float>(i);
}
float* d_data = 0;
cudaPitchedPtr dstPtr = make_cudaPitchedPtr((void**)&d_data, NX * sizeof(float), NX, NY);
printf("cudaPitchedPtr: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaExtent extent = make_cudaExtent(NX * sizeof(float), NY, NZ);;
cudaMalloc3D(&dstPtr, extent);
printf("cudaMalloc3D: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaMemset3D(dstPtr, 0, extent);
printf("cudaMemset3D: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaPitchedPtr srcPtr = make_cudaPitchedPtr((void**)&h_data, NX * sizeof(float), NX, NY);
printf("cudaPitchedPtr: %s\n", cudaGetErrorString(cudaGetLastError()));
cudaMemcpy3DParms params = {0};
params.srcPtr = srcPtr;
params.dstPtr = dstPtr;
params.extent = extent;
params.kind = cudaMemcpyHostToDevice;
cudaMemcpy3D(¶ms);
printf("cudaMemcpy3D: %s\n", cudaGetErrorString(cudaGetLastError()));
delete[] h_data;
return 0;
}