0

I have the following:

struct LR { double eps_dielect; 
        double sgm_conductivity; 
        double eno_ns_surfref;
        double frq_mhz; 
        double conf; 
        double rel;
        double erp;
        int radio_climate;  
        int pol;
        float antenna_pattern[361][1001];
          } LR;

I need to pass LR.antenna_pattern into a function, allocate the memory in a CUDA device then copy it. The float** type should represent LR.antenna_pattern[361][1001] just fine but I don't know how to instantiate the float** variable so that it is a pointer to LR.antenna_pattern

I try float** antennaPattern = (void**)&LR.antenna_pattern but it doesn't work. How do I create a pointer to LR.antenna_pattern?

Sean
  • 537
  • 1
  • 6
  • 15
  • 2
    I don't see any references here. And [`float**` is not compatible with `float[W][H]`](http://stackoverflow.com/questions/7138720/why-cant-char-be-the-return-type-of-the-following-function-in-c/7139014#7139014). – Lightness Races in Orbit Nov 27 '12 at 19:06
  • Unless this is allocated on the heap, he's *trying* to overflow the stack, because *damn*. – WhozCraig Nov 27 '12 at 19:13
  • You can force it with `float **p = (float**)LR.antenna_pattern;` But good luck keeping your core in-tact. –  Nov 27 '12 at 19:43

1 Answers1

1

One approach is to flatten your 2D array and handle it in a 1D fashion with pointer arithmetic to handle the row and column dimensions. First of all in your struct definition, replace the antenna_pattern element with:

struct LR { 
.
.
float *antenna_pattern;
} LR;

Then you will need to do a host-side malloc to allocate space:

#define COL 1001
#define ROW 361
#define DSIZE (ROW*COL)

LR.antenna_pattern = (float *)malloc(DSIZE*sizeof(float));

And a device side cuda malloc:

float *d_antenna_pattern;
cudaMalloc((void **) &d_antenna_pattern, DSIZE*sizeof(float));

The copy to the device looks like:

cudaMemcpy(d_antenna_pattern, LR.antenna_pattern, DSIZE*sizeof(float), cudaMemcpyHostToDevice);

When you want to reference into these arrays, you will have to do pointer arithmetic like:

float my_val_xy = ap[(x*COL)+y];  // to access element at [x][y] on the device

float my_val_xy = LR.antenna_pattern[(x*COL)+y]; // on the host

If you want to maintain the 2D array subscripts throughout, you can do this with an appropriate typedef. For an example, see the first code sample in my answer to this question. To diagram this out, you would need to start with a typedef:

#define COL 1001
#define ROW 361
#define DSIZE (ROW*COL)

typedef float aParray[COL];

and modify your structure definition:

struct LR { 
.
.
aParray *antenna_pattern;
} LR;

The host side malloc would look like:

LR.antenna_pattern = (aParray *)malloc(DSIZE*sizeof(float));

The device side cuda malloc would look like:

aParray *d_antenna_pattern;
cudaMalloc((void **) &d_antenna_pattern, DSIZE*sizeof(float));

The copy to the device looks like:

cudaMemcpy(d_antenna_pattern, LR.antenna_pattern, DSIZE*sizeof(float), cudaMemcpyHostToDevice);

The device kernel definition will need a function parameter like:

__global__ void myKernel(float ap[][COL]) {

Then inside the kernel you can access an element at x,y as:

float my_val_xy = ap[x][y];

Now in response to a follow-up question asking what to do if LR cannot be changed, here is a complete sample code which combines some of these ideas without modifying the LR structure:

#include<stdio.h>

// for cuda error checking
#define cudaCheckErrors(msg) \
    do { \
        cudaError_t __err = cudaGetLastError(); \
        if (__err != cudaSuccess) { \
            fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
                msg, cudaGetErrorString(__err), \
                __FILE__, __LINE__); \
            fprintf(stderr, "*** FAILED - ABORTING\n"); \
            return 1; \
        } \
    } while (0)



struct LR {
  int foo;
  float antenna_pattern[361][1001];
  } LR;

__global__ void mykernel(float ap[][1001]){

  int tid = threadIdx.x + (blockDim.x*blockIdx.x);
  float myval = 0.0;

  if (tid == 0){
    for (int i=0; i<361; i++)
      for (int j=0; j<1001; j++)
         ap[i][j] = myval++;
  }
}


int main(){

  typedef float aParray[1001];

  aParray *d_antenna_pattern;
  cudaMalloc((void **) &d_antenna_pattern, (361*1001)*sizeof(float));
  cudaCheckErrors("cudaMalloc fail");
  float *my_ap_ptr;
  my_ap_ptr = &(LR.antenna_pattern[0][0]);

  for (int i=0; i< 361; i++)
    for (int j=0; j<1001; j++)
      LR.antenna_pattern[i][j] = 0.0;
  cudaMemcpy(d_antenna_pattern, my_ap_ptr, (361*1001)*sizeof(float), cudaMemcpyHostToDevice);
  cudaCheckErrors("cudaMemcpy fail");
  mykernel<<<1,1>>>(d_antenna_pattern);
  cudaCheckErrors("Kernel fail");

  cudaMemcpy(my_ap_ptr, d_antenna_pattern, (361*1001)*sizeof(float), cudaMemcpyDeviceToHost);
  cudaCheckErrors("cudaMemcpy 2 fail");
  float myval = 0.0;
  for (int i=0; i<361; i++)
    for (int j=0; j<1001; j++)
      if (LR.antenna_pattern[i][j] != myval++) {printf("mismatch at offset x: %d y: %d actual: %f expected: %f\n", i, j, LR.antenna_pattern[i][j], --myval); return 1;}
  printf("Results match!\n");
  return 0;
}

If you prefer to use the flattened method, replace the d_antenna_pattern definition with:

float *d_antenna_pattern;

And change the kernel function parameter correspondingly to:

__global__ void mykernel(float *ap){

Then access using the pointer arithmetic method in the kernel:

ap[(i*1001)+j] = myval++;
Community
  • 1
  • 1
Robert Crovella
  • 143,785
  • 11
  • 213
  • 257
  • I'm writing all the cuda code in a separate, static library because the original program is ~8000 lines of code. If I were to change struct LR I'd lose an immense amount of time changing all the other code to use it safely and introduce even more risk to the project. Is there a way to do it without changing struct LR? – Sean Nov 27 '12 at 20:43
  • 1
    @Sean I've edited my answer with an example that shows how to do it without changing LR. For the long code sample I've added, you should be able to grab that, compile and test it. It works for me. – Robert Crovella Nov 27 '12 at 21:46
  • sweet thank you so much! Just what I was looking for! BTW the & reference operator used with the brackets &(LR.antenna_pattern[0][0]) was exactly what I wondering how to do. – Sean Nov 27 '12 at 22:44