I finally succeded in saving in memory a double pointer in order to use it in cuda.(The code below), but i see that is less performent than if i would flatten the matrix,which is not that great.
Some suggestions to save some time/memory?
I really want to use dynamic 2d array.
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdlib.h>
#include <cstdio>
__global__ void fct(int **dev_c)
{
int y = threadIdx.x;
int x = threadIdx.y;
dev_c[y][x] = 3;
}
int main(void)
{
//Output Array
int **cc = new int*[2];
for (int i = 0; i < 2; i++)cc[i] = new int[2];
//Host Array
int ** h_c = (int **)malloc(2 * sizeof(int *));
for (int i = 0; i < 2; i++) {
cudaMalloc((void**)&h_c[i], 2 * sizeof(int));
}
//Devie array
int ** d_c;
cudaMalloc((void **)&d_c, 2 * sizeof(int *));
cudaMemcpy(d_c, h_c, 2 * sizeof(int *), cudaMemcpyHostToDevice);
dim3 d(2, 2);
fct << <1, d >> > (d_c);
for (int i = 0; i < 2; i++) {
cudaMemcpy(cc[i], h_c[i], 2 * sizeof(int), cudaMemcpyDeviceToHost);
}
for (int i = 0; i < 2; i++) {
for (int j = 0; j < 2; j++) {
printf("(%d,%d):%d\n", i, j, cc[i][j]);
}
}
int x;
std::cin >> x;
delete[] h_c;
delete[] d_c;
}