I have problem trying to build basic cuda/thrust code to get more familiar with GPU programming. I am probably not compiling it properly so I would like to know what I am doing wrong?
I am building using the following instructions
nvcc -c gpu_functions.cu
nvcc gpu_functions.o gpu_test.cu -o gpu_test
However I get a linking error:
jim@pezbox:~/dev/analytics/src$ nvcc gpu_functions.o gpu_test.cu -o gpu_test
/tmp/tmpxft_00002383_00000000-14_gpu_test.o: In function `main':
tmpxft_00002383_00000000-3_gpu_test.cudafe1.cpp:(.text+0x6e): undefined reference to `void add<thrust::device_vector<int, thrust::device_malloc_allocator<int> > >(thrust::device_vector<int, thrust::device_malloc_allocator<int> > const&, thrust::device_vector<int, thrust::device_malloc_allocator<int> > const&, thrust::device_vector<int, thrust::device_malloc_allocator<int> >&)'
collect2: ld returned 1 exit status
I have three files:
- gpu_functions.h (the header function for the GPU functions)
- gpu_functions.cu (the implementation for the GPU functions)
- gpu_test.cu (the main loop that calls my defined GPU functions)
gpu_functions.h
template<typename Vector>
void add(const Vector& in1, const Vector& in2, Vector& out);
gpu_functions.cu
#include "gpu_functions.h"
#include <thrust/sequence.h>
#include <thrust/transform.h>
#include <thrust/sequence.h>
#include <thrust/copy.h>
#include <thrust/fill.h>
#include <thrust/replace.h>
#include <thrust/functional.h>
using namespace thrust;
template<typename Vector>
void add(const Vector& in1, const Vector& in2, Vector& out) {
transform(in1.begin(), in1.end(), in2.begin(), out.begin(),
plus<typename Vector::value_type>());
}
gpu_test.cu
#include "piston_functions.h"
#include <thrust/device_vector.h>
#include <iostream>
#include <stdio.h>
using namespace thrust;
int main(void) {
const int n = 100000000;
// allocate three device_vectors with 10 elements
device_vector<int> in1(n, 1);
device_vector<int> in2(n, 2);
device_vector<int> out(n, 0);
add(in1, in2, out);
thrust::copy(out.begin(), out.begin()+10, std::ostream_iterator<int>(std::cout,"\n"));
return 0;
}
I am probably doing something stupid or I have missed something very obvious.