This question is an extension of this question and related to this question.
[Q1] Do I need to cast to (void**) when doing cudaMalloc
of a struct member? Example (Please see in code questions):
The structure:
typedef struct {
int a;
int *b;
} Matrix;
The main function for allocating and copying to device:
int main void()
{
int rows, cols, numMat = 2;
//[Q2] What would be the problem of no allocating (numMat * sizeof()) here?
//for example, allocating just for sizeof(Matrix)?
Matrix *data = (Matrix*)malloc(numMat * sizeof(Matrix));
// ... Successfully read from file into "data" ...
//[Q3] Do we really need to copy "data" to host?
//[A3] No necessary
Matrix *h_data = (Matrix*)malloc(numMat * sizeof(Matrix));
memcpy(h_data, data, numMat * sizeof(Matrix));
// ... Copy matrix data is now on the gpu ...
//[Q4] Do we need to cast (void**)&(h_data->a)? 'a' not a pointer.
//[A4] An int cannot be copied in this fashion
// cudaMalloc(&(h_data->a), rows*cols*sizeof(int));
// cudaMemcpy(h_data->a, data->a, rows*cols*sizeof(int), cudaMemcpyHostToDevice);
//[Q5] Do we need to cast (void**)&(h_data->b)? 'b' is a pointer
cudaMalloc(&(h_data->b), rows*cols*sizeof(int));
cudaMemcpy(h_data->b, data->b, rows*cols*sizeof(int), cudaMemcpyHostToDevice);
// ... Copy the "meta" data to gpu ...
//[Q6] Can we just copy h_data instead? Why creating another pointer "d_data"?
//[A6] Yes
Matrix *d_data;
//[Q7] Wouldn't we need to cast (void**)&d_data?
cudaMalloc(&d_data, numMat*sizeof(Matrix));
//[Q8] h_data is in host and device. Can we just copy "data" to device?
cudaMemcpy(d_data, h_data, numMat*sizeof(Matrix));
// ... Do other things ...
}
Ultimately, we would just want to pass Matrix as a pointer:
// Kernel call
doThings<<<dimGrid, dimBlock>>>(d_data);
The kernel definition:
__global__ doThings(Matrix *matrices)
{
matrices->a = ...;
matrices->b = ...;
}
Thanks in advance for the time and work in helping me on my doubts!