As you've already discovered, the thrust vector container itself cannot be placed at file scope.
One possible solution is to simply create the vectors you need at the beginning of main
, then pass references to these to whatever functions need them.
If you really want "global behavior" you could place pointers to vectors at global/file scope, then initialize the needed vectors at the beginning of main, and set the pointers at global scope to point to the vectors created in main.
Based on the question in the comment I guess it's important/desirable that the main file be a .cpp
file compiled with the host compiler. Therefore we can use the previously mentioned concepts combined with allocation of the vectors on the heap so as to avoid deallocation until the program terminates. Here's a full example:
$ cat main.cpp
#include "transfer.h"
int main(){
float **triangles, *mesh;
triangles = new float *[1];
mesh = new float[4];
mesh[0] = 0.1f; mesh[1] = 0.2f; mesh[2] = 0.3f;
trianglesToGPU_thrust(mesh, triangles);
do_global_work(triangles);
finish();
}
$ cat transfer.h
void trianglesToGPU_thrust(const float *, float **);
void do_global_work(float **);
void finish();
$ cat transfer.cu
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include "transfer.h"
#include <iostream>
#include <cstdio>
#include <thrust/copy.h>
__global__ void k(float *data, size_t ds){
for (int i = 0; i < ds; i++) printf("%f,", data[i]);
}
// thrust vectors (global)
thrust::host_vector<float> *trianglethrust_host;
thrust::device_vector<float> *trianglethrust_device;
void trianglesToGPU_thrust(const float *mesh, float** triangles) {
//create vectors
trianglethrust_host = new thrust::host_vector<float>;
trianglethrust_device = new thrust::device_vector<float>;
// fill host vector
size_t i = 0;
while (mesh[i] != 0.0f) {
(*trianglethrust_host).push_back(mesh[i++]);
}
// copy to GPU by assigning host vector to device vector, like in the Thrust documentation
*trianglethrust_device = *trianglethrust_host;
// save raw pointer
*triangles = (float*)thrust::raw_pointer_cast(&((*trianglethrust_device)[0]));
}
void do_global_work(float** triangles){
std::cout << "from device vector:" << std::endl;
thrust::copy((*trianglethrust_device).begin(), (*trianglethrust_device).end(), std::ostream_iterator<float>(std::cout, ","));
std::cout << std::endl << "from kernel:" << std::endl;
k<<<1,1>>>(*triangles, (*trianglethrust_device).size());
cudaDeviceSynchronize();
std::cout << std::endl;
}
void finish(){
if (trianglethrust_host) delete trianglethrust_host;
if (trianglethrust_device) delete trianglethrust_device;
}
$ nvcc -c transfer.cu
$ g++ -c main.cpp
$ g++ -o test main.o transfer.o -L/usr/local/cuda/lib64 -lcudart
$ ./test
from device vector:
0.1,0.2,0.3,
from kernel:
0.100000,0.200000,0.300000,
$
Here's another approach, similar to the previous, using a std::vector
of thrust containers, at global scope (only the transfer.cu
file is different from the previous example, main.cpp
and transfer.h
are the same):
$ cat transfer.cu
#include <thrust/host_vector.h>
#include <thrust/device_vector.h>
#include "transfer.h"
#include <iostream>
#include <cstdio>
#include <thrust/copy.h>
#include <vector>
__global__ void k(float *data, size_t ds){
for (int i = 0; i < ds; i++) printf("%f,", data[i]);
}
// thrust vectors (global)
std::vector<thrust::host_vector<float> > trianglethrust_host;
std::vector<thrust::device_vector<float> > trianglethrust_device;
void trianglesToGPU_thrust(const float *mesh, float** triangles) {
//create vectors
trianglethrust_host.resize(1);
trianglethrust_device.resize(1);
// fill host vector
size_t i = 0;
while (mesh[i] != 0.0f) {
trianglethrust_host[0].push_back(mesh[i++]);
}
// copy to GPU by assigning host vector to device vector, like in the Thrust documentation
trianglethrust_device[0] = trianglethrust_host[0];
// save raw pointer
*triangles = (float*)thrust::raw_pointer_cast(trianglethrust_device[0].data());
}
void do_global_work(float** triangles){
std::cout << "from device vector:" << std::endl;
thrust::copy(trianglethrust_device[0].begin(), trianglethrust_device[0].end(), std::ostream_iterator<float>(std::cout, ","));
std::cout << std::endl << "from kernel:" << std::endl;
k<<<1,1>>>(*triangles, trianglethrust_device[0].size());
cudaDeviceSynchronize();
std::cout << std::endl;
}
void finish(){
trianglethrust_host.clear();
trianglethrust_device.clear();
}
$ nvcc -c transfer.cu
$ g++ -o test main.o transfer.o -L/usr/local/cuda/lib64 -lcudart
$ ./test
from device vector:
0.1,0.2,0.3,
from kernel:
0.100000,0.200000,0.300000,
$