Im trying to go through two vectors, compute the difference between coordinates, compute some more force using mass etc and actualize the value of acceleration on each loop the thrust::for_each does. However, I'm unable to keep track of the acceleration value.
Using Thrust and functors, I've managed to write this:
This is the functor that causes me trouble:
struct ParticleGenerator::acc_func{
//stores the initial coordinates if i-th particle to use it for computation and the init. acc. that is 0.0f
acc_func(float Ax, float Bx, float Cx, int X, int Y, int Z) : ax(Ax), bx(Bx), cx(Cx), _x(X), _y(Y), _z(Z) {}
template <typename Tuple>
__device__ __host__
void operator()(Tuple t){
//thrust::get<0>(t) +=(int) 32;
// save some values
acc[0] = thrust::get<0>(t); //(#)
acc[1] = thrust::get<1>(t);
acc2[0] = acc[0];
acc2[1] = acc[1];
//retrieve them, OK
printf("%d_%d\n", acc[0], acc[1]); //works well
}
int getRes(){
//return saved values, not OK
printf("%d_%d_%d_%d\n", acc[0], acc[1], acc2[0], acc2[1]); //prints wrong values
return 0;
}
//this returns the correct value, though
int getRes2(){ return _x;}
int acc2[2];
private:
float ax, bx, cx;
int _x, _y, _z;
int temp;
int acc[2];
};
as you can see, I've tried both public and private, also tried using just simple int (temp) to store any value but the function getRes() when used as below never returns the correct value.
I've noticed that the part _x(X)(scroll right to see the full constructor of acc_func()) stores the value correctly and I am able to retrieve it with the function getRes().
QUESTION: Is there a way how to replicate this behavior? To use the command on line marked with //(#) and successfully save, update and later return the value?
and this is my thrust loop:
for(unsigned int i = 0; i < vecPosX.size(); ++i){
acc_func AF(0.0f, 0.0f, 0.0f, vecPosX[i], vecPosY[i], vecPosZ[i]);
thrust::for_each(
thrust::make_zip_iterator(thrust::make_tuple(vecPosX.begin(), vecPosY.begin())),
thrust::make_zip_iterator(thrust::make_tuple(vecPosX.end(), vecPosY.end())),
AF
);
AF.getRes();
//use the AF.getRes() to save the values of the acceleration and
//update the vecPosX[i], vecPosY[i] etc accordingly
}
where vecPosX, vecPosY are the vectors that contaion X and Y positions of the particle
the whole idea is to create a tuple(posX, posY) and on each thrust::for_each loop recompute and actualize the acceleration and when the computation is done, simply return the results of acceleration x and y so that i can update the speed and position of the i-th particle
here are the two different results:
0_4_-588854768_32764
0_4_-588854768_32764
0_4_-588854768_32764
0_4_-588854768_32764
0_4_-588854768_32764
0_4_-588854768_32764
0_4_-588854768_32764
0_4_-588854768_32764
0_4_-588854768_32764
0_4_-588854768_32764
267_264_267_264
254_272_254_272
256_237_256_237
264_264_264_264
259_228_259_228
259_247_259_247
264_245_264_245
265_237_265_237
272_260_272_260
I hope you can help me, thank you :)
edit: please note that I've written this whole simulation application successfully using SDL, Visual Studio and two for loops one inside each other, right now I'm trying to make it faster to be able to use more particles
edit 2: to replicate the problem, one could use
thrust::device_vector<int> vecPosX(10, 10;
thrust::device_vector<int> vecPosY(10, 10);
thrust::device_vector<int> vecPosZ(10, 10);
to create the vectors and some values
edit 3:
I apologize for my bad explanation, please bear with me. Here is a complete, simple, compilable and runnable example of expected and unexpected results:
example.cu:
#include <thrust/device_vector.h>
#include <thrust/for_each.h>
#include <thrust/iterator/zip_iterator.h>
#include <iostream>
thrust::device_vector<int> vecPosX(5, 10);
thrust::device_vector<int> vecPosY(5, 10);
struct acc_func {
acc_func(int X, int Y) : _x(X), _y(Y) {}
template <typename Tuple>
__device__ __host__
void operator()(Tuple t){
acc2[0] = _x;
acc2[1] = _y;
acc[0] += 1;
acc[1] += 1;
}
void expected(){
printf("expected:\t%d:%d:and:%d:%d\n", 5, 5, _x, _y);
return;
}
void not_expected(){
printf("unexpected:\t%d:%d:nor:%d:%d\n\n", acc[0], acc[1], acc2[0], acc2[1]);
return;
}
public:
int acc2[2];
private:
int _x, _y;
int acc[2] = {0, 0};
};
int main(){
for(unsigned int i = 0; i < vecPosX.size(); ++i){
acc_func AF(vecPosX[i], vecPosY[i]);
thrust::for_each(
thrust::make_zip_iterator(thrust::make_tuple(vecPosX.begin(),vecPosY.begin())),
thrust::make_zip_iterator(thrust::make_tuple(vecPosX.end(), vecPosY.end())),
AF
);
AF.expected();
AF.not_expected();
}
return 0;
}
results:
$ nvcc -std=c++11 example.cu -o example
$ ./example
expected: 5:5:and:10:10
unexpected: 0:0:nor:19:0
expected: 5:5:and:10:10
unexpected: 0:0:nor:19:0
expected: 5:5:and:10:10
unexpected: 0:0:nor:19:0
expected: 5:5:and:10:10
unexpected: 0:0:nor:19:0
expected: 5:5:and:10:10
unexpected: 0:0:nor:19:0
edit 4
What I'm trying to achieve is re-write the following code:
float ax, ay, az, dx, dy, dz;
float invr, invr3, f;
for(unsigned int i = 0; i < particles.size(); i++){
ax = 0.0;
ay = 0.0;
az = 0.0;
for(unsigned int j = 0; j < particles.size(); j++){
dx = (float) (particles[j]->mPosX - particles[i]->mPosX);
dy = (float) (particles[j]->mPosY - particles[i]->mPosY);
dz = (float) (particles[j]->mPosZ - particles[i]->mPosZ);
invr = (float) 1.0/sqrt(dx*dx + dy*dy + dz*dz + 100);
invr3 = invr*invr*invr;
f = particles[j]->mass * invr3;
ax += f*dx;
ay += f*dy;
az += f*dz;
}
particles[i]->mPosX = particles[i]->mPosX + (int) dt*particles[i]->xVel + (int) 0.5*dt*dt*ax;
particles[i]->mPosY = particles[i]->mPosY + (int) dt*particles[i]->yVel + (int) 0.5*dt*dt*ay;
particles[i]->mPosZ = particles[i]->mPosZ + (int) dt*particles[i]->zVel + (int) 0.5*dt*dt*az;
particles[i]->xVel += dt*ax;
particles[i]->yVel += dt*ay;
particles[i]->zVel += dt*az;
}
My intention was to keep the outer loop pretty much as it is, compute the ax, ay, az using thrust (because it's going over a bunch of elements in a vector) and update the vectors with the result the of thrust for_each.
How can I safely count the ax, ay and az and return it?
the particles you see is
std::vector<Particle *> particles;
and Particle is a class that has member variables
int mPosX, mPoY, mPosZ;
float xVel, yVel, zVel;
int mass;
and dt is:
const float dt = 0.1f;
So instead of a class that contains ints, and floats I've created vectors of ints and floats so that i-th element of each vector (vector of mass, speed, position) corresponds to information about one specific particle