0

My goal is to make a generic Cuda Kernel. My first step is trying to use templates in the function cudaMain (not yet in the Kernel - this will be my second step). cudaMain is called from my c++ main() file. From cudaMain the Kernel is called. This works fine, so long as I don't use templates. As soon as I add the templates to the class and cudaMainI get this error: undefined reference to 'Cuda_class<int>::cudaMain(int, int, int*, int*, int*, int*, int*)'

Here's the code:

main.cpp:

#include "cuda_class.hpp"

Cuda_class<int> p;
p.cudaMain(trees.size(), trees[0].size(), treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box);

cuda_class.hpp:

template <class T>
class Cuda_class{
public:
    void cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]);
};

cuda_class.cu:

#include "cuda_class.hpp"

__global__
void insideBox(int *treeArray_x, int *treeArray_y, int *treeArray_z, int *treeArray_ID, int *box, int tree_size){

    //for each thread has it's own tree starting here
    int startOfTree = threadIdx.x * tree_size ;
    int endOfTree = startOfTree + tree_size - 1;
    traverseTree(treeArray_x, treeArray_y, treeArray_z, treeArray_ID, box, 1, startOfTree, endOfTree);

}
template <class T>
void Cuda_class<T>::cudaMain(int number_of_trees, int tree_size, T treeArray_x[], T treeArray_y[], T treeArray_z[], int treeArray_ID[], T box[]){

    cudaSetDevice(MYDEVICE);
    // do something allocate memory etc

    //launch kernel
    insideBox<<<1,32>>>(d_treeArray_x, d_treeArray_y, d_treeArray_z, d_treeArray_ID, d_box, tree_size);
    //do some other stuff
}
m.s.
  • 16,063
  • 7
  • 53
  • 88
aces
  • 185
  • 1
  • 1
  • 10
  • Yes this might be a duplicate. But I can't `#include "cuda_class.cu"` in the main. How can I solve this? – aces Jun 23 '15 at 09:16
  • `cudaMain` must be implemented in `cuda_class.hpp` – m.s. Jun 23 '15 at 09:19
  • And where do I implement the kernel? cudaMain calls the kernel. – aces Jun 23 '15 at 09:22
  • either make the kernel a template and put it in `cuda_class.hpp` as well or you could declare `__global__void insideBox(int*,int*,int*,int*,int*,int);` in the header, but implement it in the cu-file. – m.s. Jun 23 '15 at 09:34
  • Do you actually instantiate the templates in the translation unit where they are defined? – talonmies Jun 23 '15 at 14:19

0 Answers0