I'm trying to create a struct that will hold both the host and device arrays in one place and should reside on the host. I later intend expand it to be a element of a linked list. The basic struct looks like this:
typedef struct Data{
double *h;
double *d;
} Data;
Where *h points to an array of doubles on the host and *d points to an array of doubles on the device.
There are various answers on SO about copying whole structs to the the device (CUDA cudaMemcpy Struct of Arrays) but none of them quite do what I need. I have the following code but keep getting illegal memory access errors.
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include "cuda.h"
/*
* CUDA Error stuff
*/
static void HandleError( cudaError_t err,
const char *file,
int line ) {
if (err != cudaSuccess) {
printf( "%s in %s at line %d\n", cudaGetErrorString( err ),
file, line );
exit( EXIT_FAILURE );
}
}
#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))
#define HANDLE_NULL( a ) {if (a == NULL) { \
printf( "Host memory failed in %s at line %d\n", \
__FILE__, __LINE__ ); \
exit( EXIT_FAILURE );}}
//malloc error code
int errMsg(const char *message, int errorCode)
{
printf("%s\n", message);
return errorCode;
}
typedef struct Data{
double *h;
double *d;
} Data;
__global__ void kernel(Data *d)
{
int tid = blockIdx.x * blockDim.x + threadIdx.x;
if(tid<100){
d->d[tid] = 2;
}
}
int main()
{
Data *d;
d = (Data*)malloc(sizeof(Data));
d->h = (double*)malloc(sizeof(double)*100);
HANDLE_ERROR( cudaMalloc((void**) &(d->d), 100*sizeof(double)) );
for(int i=0; i<100; i++){
d->h[i] = i;
}
HANDLE_ERROR( cudaMemcpy(d->d, d->h, 100*sizeof(double), cudaMemcpyHostToDevice) );
printf("%f\n", d->h[1]);
kernel<<<1, 102>>>(d);
printf("done\n");
{
cudaError_t cudaerr = cudaDeviceSynchronize();
if (cudaerr != cudaSuccess)
printf("kernel launch failed with error \"%s\"->\n",
cudaGetErrorString(cudaerr));
}
HANDLE_ERROR( cudaMemcpy(d->h, d->d, 100*sizeof(double), cudaMemcpyDeviceToHost) );
printf("%f\n", d->h[99]);
return 0;
}
The output I get is:
1.000000
done
kernel launch failed with error "an illegal memory access was encountered"->
an illegal memory access was encountered in linkedListGPU.cu at line 77
I suspect I have just messed up my pointers a bit. The error handling code is from the Wiley introduction to CUDA book, if there code is not allowed on here I'll remove it.
Thanks.