0

Well, sorry if this is similar to something seen before. I have the following code:

//kern.cu
#include <stdio.h>
#include <dlfcn.h>
#include <unistd.h>
extern "C"{
#include "Kernalize.h"
#include <stdio.h>
}

extern "C" {
    __device__ void *dat;
    __global__ void memManageDevice(void *data){
        dat=data;
    }
    void memManageD(void *data){
        printf("A%d",data);
        void *d;
        printf("B%d",d);
        cudaMemcpy(d,&data,sizeof(data),cudaMemcpyHostToDevice);
        memManageDevice<<<1,1>>>(data);
    }
    __global__ void MemManageC(void *r){//don't call this unless in this file.
        r=dat;
    }
    void* memManageH(void *s){
        printf("C%d",s);
        void *dr;
        cudaMalloc((void **)&dr, sizeof(s));
        void *hr;
        int size=sizeof(s);
        MemManageC<<<1,1>>>(dr);
        cudaMemcpy(&hr, dr, size, cudaMemcpyDeviceToHost);
        printf("D%d",hr);
        return hr;
    }
    __global__ void kernalize(void (*ptr)(void *)) {
        (*ptr)(dat);
    }
    void Start(int d1, int d2, void (*ptr)(void *), void *data) {//TODO: make arrays as to start many  kernels
        int size=sizeof(data);
        // void *ddata;
        // bool ab=true;
        // bool *coolbeans=&ab;
        // memManageD<<<1,1>>>(data);
        kernalize<<<d1,d2,d2*size>>>(ptr);
        // data=sdata;
        // coolbeans=false;
        //kernalize(ptr,data);
    }
}

And I compile this into a .so:

nvcc --ptxas-options=-v --compiler-options '-fPIC' -o libpar.so --shared kern.cu

Then from normal C I reference it:

typedef void (*gFunc) ();
typedef void (*sFunc) (int,int,gFunc*,void *data);

typedef void* (*hFunc) (void *);
typedef void (*dFunc) (void *);
void toBe(void *data){
        data=12;
        while(1){}//side-expirement, don't think it's the stem of the issue.
}
int main() {
    printf("start");
    sFunc fS;
    hFunc hS;
    dFunc dS;
    void* hLibrary = dlopen("./libpar.so", RTLD_NOW | RTLD_GLOBAL);
    if(hLibrary == NULL) {
        fprintf(stderr, "%s\n", dlerror());
        return 1;
    }
    int i=42;
    *(void**)(&dS)=dlsym(hLibrary,"memManageD");
    (void) dS(i);
    sleep(1);
    printf("checkpoint");
    *(void**)(&fS)=dlsym(hLibrary,"Start");
    (void) fS(2,2,toBe,&i);
    sleep(1);
    *(void**)(&hS)=dlsym(hLibrary,"memManageH");
    int x=(void*) hS(&i);
    printf("%d", x);

    return 0;
}

As you might be able to tell through my monstrously hideous code, the function toBe is being passed to a CUDA C kernel, where upon execution it's expected to change the non-type variable pointer "data" to 12. "data" is a reference to "i" in the normal c, and starts as 42. Unfortunately, my output is 1, and not 12:

 startA42B431891052checkpointC-288453328D1

which is really just garbage memory listings in between a "A42" and a "D1". I'm relatively new to CUDA C, and to C for that matter. (I spend the majority of my time with higher-level programming languages.) So the question really is where I am making a stupid mistake, either in my understanding of CUDA, my syntax with C, or my whole perception of how I envisioned this.

Liverwurst
  • 55
  • 5
  • 1
    you can't pass a pointer to a host-code function to a device kernel to execute on the device. Your "normal" C code is host code. It is compiled by the host compiler into an x86 executable entry point. That bare pointer to that entry point is completely useless in device code. GPUs don't execute x86 code. If you did CUDA error checking, you'd discover that things are going wrong in the kernel. There are [many examples](https://stackoverflow.com/questions/31057870/passing-host-function-as-a-function-pointer-in-global-or-device-function/31058123#31058123) on the `cuda` tag for device pointers. – Robert Crovella Jun 21 '18 at 01:07

1 Answers1

0

Here's my understanding of why the outputs are seen:

  • A42 is working as expected because i is equal to 42

  • B431891052 because the pointer void* d is not initialized, and may be containing a garbage value, and the print statement printf("B%d",d) just prints the lower 32 bits of it.

  • C-288453328 because the statement printf("C%d",s) prints the lower 32 bits of variable i. In 32 bits the number -288453328 corresponds to hexadecimal number 0xEECE8D30; I think it's the lower 32 bits of the stack address 0x7FFFEECE8D30.

  • D1 because the hr will contain whatever first 8 bytes dr is point to. It is uninitialized, and in this example the value it points to happens to be 1.

I think to assign have D12 the function memManageC may need to be changed to the following:

__global__ void MemManageC(int *r) {
    *r = dat;
}

Thus, the content pointed to by r will have the value of dat. Without the de-reference, the function only modifies the argument that's passed in, which is an no-op and may get optimized away by the compiler.

To have D12 also requires toBe to correctly set the value of dat which seems to require some more code changes. toBe is a host-side function, and a usage of a host-side function in __global__ or __kernel__ functions can be found in this answer: Passing Host Function as a function pointer in __global__ OR __device__ function in CUDA

nitroglycerine
  • 161
  • 1
  • 8