I found some difficulties trying to implement ODEs solver routines running on GPUs using CUDA::Thurst iterators to solve a bunch of coupled first order equations in the GPU. I want to work around the approach in the former question enabling the user to write the systems of equations as human like as possible using arbitrary functors working on tuples of vectors. In details, here is a small piece of code:
#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/sequence.h>
#include <thrust/copy.h>
#include <thrust/fill.h>
#include <thrust/replace.h>
#include <thrust/functional.h>
#include <thrust/for_each.h>
#include <thrust/iterator/zip_iterator.h>
#include <iostream>
#include <math.h>
__host__ __device__ float f1(float x)
{
return sinf(x);
}
__host__ __device__ float f2(float x)
{
return cosf(x);
}
__host__ __device__ float Vx(float x)
{
return sinf(x);
}
struct q_dot
{
float x;
float delta;
q_dot(float _x,float _delta): x(_x),delta(_delta){};
template <typename Tuple>
__host__ __device__
float operator()(Tuple t)
{
float p = thrust::get<1>(t) + delta;
return p/MASS;
}
};
struct p_dot
{
float x;
float delta;
p_dot(float _x,float _delta): x(_x),delta(_delta){};
template <typename Tuple>
__host__ __device__
float operator()(Tuple t)
{
float q = thrust::get<0>(t) + delta;
return -Vx(q);
}
};
struct euler_functor
{
unsigned fn;
float h;
float er;
float x0;
euler_functor(unsigned _fn,float _x0,float _h, float _er) : fn(_fn),h(_h),er(_er),x0(_x0) {};
template <typename Tuple>
__host__ __device__
void operator()(Tuple t) const {
// if (fn == 1) y = h*f1(y);
//else if (fn == 2) y = h*f2(y); This can be handled in this way?
q = h*p_dot(x0,h/2)(t);
p = h*p_dot(x0,h/2)(t);
float er_p,er_q;
er_p=0.5*h*p_dot(x0,h/2)(t);
er_q=0.5*h*q_dot(x0,h/2)(t);
er = er_p;
}
};
int main(void)
{
float t=0;
float t_step=0.1;
float error;
const unsigned N = 8;
// allocate three device_vectors with 10 elements
thrust::device_vector<float> Q(N),P(N);
// initilaize to some values
thrust::sequence(Q.begin(), Q.end(), 0.0f, (float)(6.283/(float)N));
// initilaize to some values
thrust::sequence(P.begin(), P.end(), 0.0f, (float)(10.283/(float)N));
// apply euler for each element of Q and P
//thrust::for_each(X.begin(),X.end(),euler_functor(1,t,t_step,error)); this becomes:
thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(Q.begin(),P.begin())),
thrust::make_zip_iterator(thrust::make_tuple( Q.end(), P.end())),euler_functor(1,t,t_step,error));
// print the values
for(int i = 0; i < N; i++) std::cout<< Q[i]<<" "<<P[i]<< std::endl;
}
But when I compile the former code I get a lot of errors. Again, I am not sure if this is the best way to do it. How can I make it work? Where are my error in it? Is there a better approach? as It is the er variable that carries information about the numerical error is returning always zero. Howto get this information? It can be use to implement some adaptive trick.