The documentation of cudaMalloc3D
says
The returned
cudaPitchedPtr
contains additional fieldsxsize
andysize
, the logical width and height of the allocation, which are equivalent to the width and height extent parameters provided by the programmer during allocation.
However, if I run the following minimum example
#include<stdio.h>
#include<cuda.h>
#include<cuda_runtime.h>
#include<device_launch_parameters.h>
#include<conio.h>
#define Nrows 64
#define Ncols 64
#define Nslices 16
/********************/
/* CUDA ERROR CHECK */
/********************/
// --- Credit to http://stackoverflow.com/questions/14038589/what-is-the-canonical-way-to-check-for-errors-using-the-cuda-runtime-api
void gpuAssert(cudaError_t code, char *file, int line, bool abort = true)
{
if (code != cudaSuccess)
{
fprintf(stderr, "GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) { exit(code); }
}
}
void gpuErrchk(cudaError_t ans) { gpuAssert((ans), __FILE__, __LINE__); }
/********/
/* MAIN */
/********/
int main() {
// --- 3D pitched allocation and host->device memcopy
cudaExtent extent = make_cudaExtent(Ncols * sizeof(float), Nrows, Nslices);
cudaPitchedPtr devPitchedPtr;
gpuErrchk(cudaMalloc3D(&devPitchedPtr, extent));
printf("xsize = %i; xsize in bytes = %i; ysize = %i\n", devPitchedPtr.xsize, devPitchedPtr.pitch, devPitchedPtr.ysize);
return 0;
}
I receive:
xsize = 256; xsize in bytes = 512; ysize = 64
So, ysize
is actually equal to Nrows
, but xsize
is different from either Ncols
or xsize in bytes / sizeof(float)
.
Could you please help me understanding the meaning of the xsize
and ysize
fields in the cudaPitchedPtr
of cudaMalloc3D
?
Thank you very much in advance for any help.
My system: Windows 10
, CUDA 8.0
, GT 920M
, cc 3.5
.