I want to implement properly an inlined device function that fill out a vector of dynamic size and return the filled vector like:
__device__ inline thrust::device_vector<double> make_array(double zeta, int l)
{
thrust::device_vector<double> ret;
int N =(int)(5*l+zeta); //the size of the array will depend on l and zeta, in a complex way...
// Make sure of sufficient memory allocation
ret.reserve(N);
// Resize array
ret.resize(N);
//fill it:
//for(int i=0;i<N;i++)
// ...;
return ret;
}
My goal is to use the content of the returned vector in another device function like:
__device__ inline double use_array(double zeta,int l)
{
thrust::device_vector<double> array = make_array(zeta, l);
double result = 0;
for(int i=0; i<array.size(); i++)
result += array[i];
return result;
}
How can I do it properly? my feeling is that a thrust vector is designed for this type of task, but I want to do it properly. What is the standard CUDA approach to this task?