I'm attempting to copy a 2d double pointer array from the system memory to my GPU memory using cudaMalloc. I tried using the solution to this question however a) it doesn't appear to be working and b) the solution was designed for jagged arrays where each row has a variable width, which results in a more inefficient solution (o(n) vs the theoretically possible o(1) via a single malloc call.
This program is designed to copy the contents of **src to **dst and return the destination array back to system memory. Could someone point out why this doesn't work? I'm a n00b at c and cuda so appreciate the help.
#include <stdio.h>
#include <assert.h>
#include <cuda.h>
#include <cuda_runtime.h>
__global__
void copy(double **src, double **dst, int width, int height)
{
for (int i = 0; i < width; i++) {
for (int j = 0; j < height; j++) {
dst[i][j] = src[i][j];
}
}
}
int main(int argc, char *argv[]){
int width = 3;
int height = 5;
double **src_ptr;
double **cuda_src;
double **dst_ptr;
double **cuda_dst;
src_ptr = (double **)malloc(width * sizeof(double*));
dst_ptr = (double **)malloc(width * sizeof(double*));
cudaMalloc(cuda_src, width * sizeof(double*));
cudaMalloc(cuda_dst, width * sizeof(double*));
for(int i = 0; i<width;i++){
src_ptr[i] = (double *)malloc(height * sizeof *src_ptr[i]);
dst_ptr[i] = (double *)malloc(height * sizeof *dst_ptr[i]);
cudaMalloc((void ** ) &cuda_src[i], height * sizeof(double));
cudaMalloc((void ** ) &cuda_dst[i], height * sizeof(double));
for(int j=0;j<height;j++){
src_ptr[i][j] = (double)(rand() % 1000) / 1000;
dst_ptr[i][j] = 0.;
}
}
cudaMemcpy(cuda_src, src_ptr, width * sizeof(double*), cudaMemcpyHostToDevice);
copy<<<1,1>>>(cuda_src, cuda_dst, width, height);
cudaMemcpy(dst_ptr, cuda_dst, width * sizeof(double*), cudaMemcpyDeviceToHost);
printf("Source: \n");
for (int i = 0; i < width; i++) {
for (int j = 0; j < height; j++) {
printf("%f ", src_ptr[i][j]);
}
printf("\n");
}
printf("\n\nDest:\n");
for (int i = 0; i < width; i++) {
for (int j = 0; j < height; j++) {
printf("%f ", dst_ptr[i][j]);
}
printf("\n");
}
}
Expected output:
Source:
0.383000 0.886000 0.777000 0.915000 0.793000
0.335000 0.386000 0.492000 0.649000 0.421000
0.362000 0.027000 0.690000 0.059000 0.763000
Dest:
0.383000 0.886000 0.777000 0.915000 0.793000
0.335000 0.386000 0.492000 0.649000 0.421000
0.362000 0.027000 0.690000 0.059000 0.763000
Actual output:
Source:
0.383000 0.886000 0.777000 0.915000 0.793000
0.335000 0.386000 0.492000 0.649000 0.421000
0.362000 0.027000 0.690000 0.059000 0.763000
Dest:
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000
0.000000 0.000000 0.000000 0.000000 0.000000