4

I'm wondering if there's anyway to pass a pointer-to-member to a device function in CUDA. Since the pointer is really just relative to the struct/class it doesn't seem like there should be any reason it wouldn't work but I can't seem to get the code to compile.

#include <stdio.h>


struct S {
    int F1;
    int F2;
    int F3;
};

__device__ S x;

__global__ void initialize_S() {
    x.F1 = 100;
    x.F2 = 200;
    x.F3 = 300;
}

__global__ void print_S(int S::* m) {
    printf("val: %d\n", x.*m);
}

int main() {

    initialize_S<<<1, 1>>>();
    print_S<<<1, 1>>>(&S::F1);

    cudaDeviceSynchronize();
}

When compiling I get the following error with NVCC v5.5

/tmp/tmpxft_000068a5_00000000-16_ptm.o: In function `main':
tmpxft_000068a5_00000000-3_ptm.cudafe1.cpp:(.text+0xcf): undefined reference to `print_S(int S::*)'
/tmp/tmpxft_000068a5_00000000-16_ptm.o: In function `__device_stub__Z7print_SM1Si(long)':
tmpxft_000068a5_00000000-3_ptm.cudafe1.cpp:(.text+0x17f): undefined reference to `print_S(int S::*)'
tmpxft_000068a5_00000000-3_ptm.cudafe1.cpp:(.text+0x184): undefined reference to `print_S(int S::*)'
collect2: error: ld returned 1 exit status

Any help would be appreciated. Thanks!

EDIT: after traipsing through the code genrerated by NVCC it actually looks like it's generating it wrong:

extern void __device_stub__Z7print_SM1Si(long);
void __device_stub__Z7print_SM1Si( long __par0) { if (cudaSetupArgument((void *)(char *)&__par0, sizeof(__par0), (size_t)0UL) !=
cudaSuccess) return; { volatile static char *__f __attribute__((unused)); __f = ((char *)((void ( *)(long))print_S)); (void)cudaL
aunch(((char *)((void ( *)(long))print_S))); }; }
# 18 "ptm.cu"
void print_S( long __cuda_0)
# 18 "ptm.cu"
{__device_stub__Z7print_SM1Si( __cuda_0);

}

By patching the generated code to convert these "long"s to "int S::*"s it compiles and functions correctly.

 extern void __device_stub__Z7print_SM1Si(int S::*);
 void __device_stub__Z7print_SM1Si(int S::* __par0) { if (cudaSetupArgument((void *)(char *)&__par0, sizeof(__par0), (size_t)0UL)
 != cudaSuccess) return; { volatile static char *__f __attribute__((unused)); __f = ((char *)((void ( *)(int S::*))print_S)); (voi
 d)cudaLaunch(((char *)((void ( *)(int S::*))print_S))); }; }
 # 18 "ptm.cu"
 void print_S(int S::* __cuda_0)
 # 18 "ptm.cu"
 {__device_stub__Z7print_SM1Si( __cuda_0);

 }
thchittenden
  • 85
  • 1
  • 5
  • F1-3 are int, not function pointers in your code... – László Papp Apr 21 '14 at 14:58
  • Yes, and I'm trying to pass a pointer to a data member to the kernel. See http://stackoverflow.com/questions/670734/c-pointer-to-class-data-member – thchittenden Apr 21 '14 at 15:08
  • @TomChittenden: This looks like a genuine limitation in the CUDA C++ front end. Could you add your findings as a short answer? Later you will be able to accept it and get the question off the unanswered list (I will upvote it too). I would also recommend making a bug report to NVIDIA, this looks like something they should fix. – talonmies Apr 21 '14 at 17:19

2 Answers2

3

This appears to be a limitation of nvcc as already indicated elsewhere. I have filed a bug with the compiler team. They are aware of the issue. I don't have any further information about a possible update or schedule.

A possible workaround was suggested as follows, for Linux/MacOS only:

#include <stdio.h>

template <typename T>
struct dummy {
  T inner;
  T __host__ __device__ get(void) { return inner; };
  __host__ __device__ dummy(T in) : inner(in) { };
};



struct S {
    int F1;
    int F2;
    int F3;
};

__device__ S x;

__global__ void initialize_S() {
    x.F1 = 100;
    x.F2 = 200;
    x.F3 = 300;
}

__global__ void print_S(dummy<int S::*> m) {
    printf("val: %d\n", x.*(m.get()));
}

int main() {

    initialize_S<<<1, 1>>>();
    print_S<<<1, 1>>>(dummy<int S::*>(&S::F1));

    cudaDeviceSynchronize();
}

I'm not able to comment on the usefulness of the above. The above seems to compile and run correctly on CUDA 6.0

Also, usage of pointer-to-member appears to work correctly in device code. The limitation described here is specific to its usage when passed as a __global__ function parameter.

Robert Crovella
  • 143,785
  • 11
  • 213
  • 257
2

It looks like this is a limitation of NVCC at the moment. I've posted in the NVIDIA dev forums so hopefully this gets resolved!

thchittenden
  • 85
  • 1
  • 5