Well, sorry if this is similar to something seen before. I have the following code:
//kern.cu
#include <stdio.h>
#include <dlfcn.h>
#include <unistd.h>
extern "C"{
#include "Kernalize.h"
#include <stdio.h>
}
extern "C" {
__device__ void *dat;
__global__ void memManageDevice(void *data){
dat=data;
}
void memManageD(void *data){
printf("A%d",data);
void *d;
printf("B%d",d);
cudaMemcpy(d,&data,sizeof(data),cudaMemcpyHostToDevice);
memManageDevice<<<1,1>>>(data);
}
__global__ void MemManageC(void *r){//don't call this unless in this file.
r=dat;
}
void* memManageH(void *s){
printf("C%d",s);
void *dr;
cudaMalloc((void **)&dr, sizeof(s));
void *hr;
int size=sizeof(s);
MemManageC<<<1,1>>>(dr);
cudaMemcpy(&hr, dr, size, cudaMemcpyDeviceToHost);
printf("D%d",hr);
return hr;
}
__global__ void kernalize(void (*ptr)(void *)) {
(*ptr)(dat);
}
void Start(int d1, int d2, void (*ptr)(void *), void *data) {//TODO: make arrays as to start many kernels
int size=sizeof(data);
// void *ddata;
// bool ab=true;
// bool *coolbeans=&ab;
// memManageD<<<1,1>>>(data);
kernalize<<<d1,d2,d2*size>>>(ptr);
// data=sdata;
// coolbeans=false;
//kernalize(ptr,data);
}
}
And I compile this into a .so:
nvcc --ptxas-options=-v --compiler-options '-fPIC' -o libpar.so --shared kern.cu
Then from normal C I reference it:
typedef void (*gFunc) ();
typedef void (*sFunc) (int,int,gFunc*,void *data);
typedef void* (*hFunc) (void *);
typedef void (*dFunc) (void *);
void toBe(void *data){
data=12;
while(1){}//side-expirement, don't think it's the stem of the issue.
}
int main() {
printf("start");
sFunc fS;
hFunc hS;
dFunc dS;
void* hLibrary = dlopen("./libpar.so", RTLD_NOW | RTLD_GLOBAL);
if(hLibrary == NULL) {
fprintf(stderr, "%s\n", dlerror());
return 1;
}
int i=42;
*(void**)(&dS)=dlsym(hLibrary,"memManageD");
(void) dS(i);
sleep(1);
printf("checkpoint");
*(void**)(&fS)=dlsym(hLibrary,"Start");
(void) fS(2,2,toBe,&i);
sleep(1);
*(void**)(&hS)=dlsym(hLibrary,"memManageH");
int x=(void*) hS(&i);
printf("%d", x);
return 0;
}
As you might be able to tell through my monstrously hideous code, the function toBe is being passed to a CUDA C kernel, where upon execution it's expected to change the non-type variable pointer "data" to 12. "data" is a reference to "i" in the normal c, and starts as 42. Unfortunately, my output is 1, and not 12:
startA42B431891052checkpointC-288453328D1
which is really just garbage memory listings in between a "A42" and a "D1". I'm relatively new to CUDA C, and to C for that matter. (I spend the majority of my time with higher-level programming languages.) So the question really is where I am making a stupid mistake, either in my understanding of CUDA, my syntax with C, or my whole perception of how I envisioned this.