0

Results

The above results are the |X|Y|Z|AbsDistance of each sphere intersection, random spooky values appear probably because of a newbie mistake, but I really can't get it.

To be as specific as I can:

The following snippet is supposed to calculate the intersection point between a ray and a spherical boundary with a predefined radius and the origin as the center.

To give more context:

1- The RandomWalk starts from the origin and moves with a randomly generated _step and _direction.

2- After each step, the ray is checked for hitting possibility by comparing the absolute distance to the radius of the boundary.

3- getIntersectionPoint() returns the point of intersection, but as the number of points or number of steps increases, the probability of outcasts increases, messing up the whole thing.

Here's what I've done:

#include <curand.h>
#include <curand_kernel.h>
#include <iostream>

#define N 256                           // Number of photons
#define THREADS_PER_BLOCK 256           // Threads per Block
#define BOUNDARY_RADIUS 5.0

class Point{
private:
    float _x;
    float _y;
    float _z;
public:
    __device__ __host__ Point(float x, float y, float z){
        setCoordinates(x, y, z);
    }

    __device__ __host__  Point(){
        setCoordinates(0.f, 0.f, 0.f);
    }

    __device__ __host__
 void setCoordinates(float x, float y, float z)
{
    this->_x = x;
    this->_y = y;
    this->_z = z;
}
__device__ __host__  float getX() const { return this->_x; }

__device__ __host__   float getY() const { return this->_y; }

__device__ __host__   float getZ() const { return this->_z; }

__device__ __host__  
    Point add(Point point){
        float result_x = this->_x + point.getX();
        float result_y = this->_y + point.getY();
        float result_z = this->_z + point.getZ();
        return Point( result_x, result_y, result_z );
    }

    __device__ __host__  
    Point subtract(Point point){
        float result_x = this->_x - point.getX();
        float result_y = this->_y - point.getY();
        float result_z = this->_z - point.getZ();
        return Point( result_x, result_y, result_z );
    }
};

class RNG{
private:
__device__  float generate( curandState* globalState, int i) 
{
    curandState localState = globalState[i];
    float random = curand_uniform( &localState );
    globalState[i] = localState;
    return random;
}
public:
__device__   float getRandomStep( curandState* globalState , int i) { 
    float step = 0.f;       // Intialize for step value
    step = generate (globalState, i);
    return step;
 } 

__device__  Point getRandomPoint( curandState* globalState , int i)
{

    float u = generate (globalState , i);
    float v = generate (globalState, i);

    float theta = 2 * M_PI * u;
    float phi = acos(1 - 2 * v);

    // Transforming into the cartesian space
    float x = sin(phi) * cos(theta);
    float y = sin(phi) * sin(theta);
    float z = cos(phi);

    return Point(x,y,z);
}
};

class Ray{
private:
    Point _prevPos;
    Point _currentPos;
    Point _direction;
    float _step;
public:
    __device__ Ray(Point startingPoint, Point direction){
        this->_currentPos.setCoordinates(startingPoint.getX(), startingPoint.getY(), startingPoint.getZ());
        this->_direction.setCoordinates(direction.getX(), direction.getY(), direction.getZ());
    }

    __device__ void setDirection(Point direction) { this->_direction.setCoordinates(direction.getX(), direction.getY(), direction.getZ()); }

    __device__ void setStep(float step) { this->_step = step; }

    __device__ Point getCurrentPos() const { return this->_currentPos; }

    __device__ Point getDirection() const { return this->_direction; }

    __device__ Point getPrevPos() const { return this->_prevPos; }

    __device__ float getStep() const { return this->_step; }

    __device__ void move(Point direction, float step) // The point moves in the specified direction with the given step
    {
        this->_prevPos = this->_currentPos;
        this->_direction = direction;
        this->_step = step;
        float newX = this->_currentPos.getX() + (direction.getX() * step);
        float newY = this->_currentPos.getY() + (direction.getY() * step);
        float newZ = this->_currentPos.getZ() + (direction.getZ() * step);
        this->_currentPos.setCoordinates(newX, newY, newZ);
    }
};

class Boundary{
private:
    float _radius;
    Point _center;

    __device__
    float dot(Point point1, Point point2){return point1.getX()*point2.getX() + point1.getY()*point2.getY() + point1.getZ()*point2.getZ();}

public:
    __device__ __host__ Boundary(float r, Point c){
        _radius = r;
        _center = c;
    }

    __device__ bool isCrossed(Ray ray){
        float absDistance = (float) sqrtf((float) powf(ray.getCurrentPos().getX(),2)
                            + (float) powf(ray.getCurrentPos().getY(),2) 
                            + (float) powf(ray.getCurrentPos().getZ(),2));
        if(absDistance >= _radius){
            return true;
        } else {
            return false;
        }
    };


    __device__ Point getIntersectionPoint(Ray ray){
            Point A = ray.getPrevPos();
            Point B = ray.getDirection();
            Point S = A.add(_center);
            Point A_C = A.subtract(_center);
            float a = dot(B, B);
            float b = 2.0 * dot(B, A_C);
            float c = dot(A_C, A_C) - _radius*_radius;
            float discriminant = b*b - 4*a*c;
            float t1 = (-b + sqrtf(discriminant)) / (2.0*a);
            float t2 = (-b - sqrtf(discriminant)) / (2.0*a);
            float t;

            if(t1 < 0){
                t = t2;
            } else {
                t = t1;
            }

            return Point((A.getX()+B.getX()*t),(A.getY()+B.getY()*t),(A.getZ()+B.getZ()*t));
    }
};



/**
 * @brief randomWalk
 * keeps wandering around with the photon in the 3D space
 * @return The Point where the Photon hits the Boundary
 */
 __device__ Point randomWalk(curandState_t *states, int idx, Boundary boundary, RNG rng)
 {
     Ray ray = Ray(Point(0.f, 0.f, 0.f), Point(0.f, 0.f, 0.f));

     while (!boundary.isCrossed(ray))
     {
         ray.move(rng.getRandomPoint(states, idx), rng.getRandomStep(states, idx));
     }
     return boundary.getIntersectionPoint(ray);
 }





void streamOut(Point* _cpuPoints);

__global__ void finalPosition(unsigned int seed, curandState_t* states, Point* _gpuPoints,Boundary boundary,RNG rng) {
    int idx = blockIdx.x*blockDim.x+threadIdx.x;
    curand_init(seed, idx, 0, &states[idx]);
    Point finalPos;
    finalPos = randomWalk(states, idx, boundary, rng);
    _gpuPoints[idx] = finalPos;
}

  int main() {

    int nBlocks = N/THREADS_PER_BLOCK + 1;

    curandState_t* states;
    cudaMalloc((void**) &states, N * sizeof(curandState_t));

// Allocate host memory for final positions
    Point * _cpuPoints= (Point*)malloc(sizeof(Point) * N);

// Allocate device  memory for final positions
    Point* _gpuPoints = nullptr;
    cudaMalloc((void**) &_gpuPoints, N * sizeof(Point));

// Initializing the Boundary and the Random Number Generator
    Boundary boundary = Boundary(BOUNDARY_RADIUS, Point(0.f, 0.f, 0.f));
    RNG rng;

// Call Kernel
    finalPosition<<<nBlocks,THREADS_PER_BLOCK>>>(time(0), states , _gpuPoints, boundary, rng);

// Copy device data to host memory to stream them out
    cudaMemcpy(_cpuPoints, _gpuPoints, N* sizeof( Point), cudaMemcpyDeviceToHost);


    streamOut (&_cpuPoints[0]);

    free(_cpuPoints);
    cudaFree(_gpuPoints);


    return 0;

}

void streamOut(Point* _cpuPoints)  
{
    FILE *output;
    output = fopen("output.csv", "w");

    for (int i = 0; i < N; i++)
    {
        // Streaming out my output in a log file
        float absDistance = (float) sqrtf((float) powf(_cpuPoints[i].getX(), 2) 
                            + (float) powf(_cpuPoints[i].getY(), 2) 
                            + (float) powf(_cpuPoints[i].getZ(), 2));
        fprintf(output, "%f,%f,%f,%f\n", _cpuPoints[i].getX(), _cpuPoints[i].getY(), _cpuPoints[i].getZ(), absDistance);
    }
}

  • 1
    snippets are not the right approach for a question like this. You are supposed to provide a [mcve], which is a complete code that someone else can compile and run and see the output. See item 1 [here](https://stackoverflow.com/help/on-topic). – Robert Crovella Nov 23 '19 at 21:51
  • Okay, thanks, I will do that – Mostafa Ahmed Asaad Nov 23 '19 at 22:18
  • I guess this is as minimal as it gets @RobertCrovella – Mostafa Ahmed Asaad Nov 23 '19 at 22:47
  • The code you have posted won't compile. There are many issues, so I'll pick just one. You've provided no constructor definition for `Point`. If you want help, my suggestion is to post a compilable code. There's no need to guess at it. Start over with a new project, add only the code you have posted, and see if you can compile it. If you can't keep updating it till it compiles and produces the problematic output. Then post that. Because there's a good chance this is the process others will follow who may try to help you. They will very likely want to compile and run the code. – Robert Crovella Nov 23 '19 at 23:10
  • I will iterate on it – Mostafa Ahmed Asaad Nov 23 '19 at 23:14
  • @RobertCrovella Thanks again for your patience with me, I believe it's compilable now – Mostafa Ahmed Asaad Nov 23 '19 at 23:46

1 Answers1

0

Any time you are having trouble with a CUDA code, I recommend using proper CUDA error checking and run your code with cuda-memcheck. When I run your code with cuda-memcheck, I get a variety of errors. This means your kernel code is making illegal, out-of-bounds accesses. You can start to track this down using the method described here.

One problem in your code is that you are launching more blocks/threads than what your allocation size N dictates:

int nBlocks = N/THREADS_PER_BLOCK + 1;

This means some of the threads in your kernel launch will make out-of-bounds accesses. You need to address this with a thread check (if statement) in your kernel code.

When I take your code as posted, and modify the kernel like this:

__global__ void finalPosition(unsigned int seed, curandState_t* states, Point* _gpuPoints,Boundary boundary,RNG rng, int n) {
    int idx = blockIdx.x*blockDim.x+threadIdx.x;
    if (idx < n){
      curand_init(seed, idx, 0, &states[idx]);
      Point finalPos;
      finalPos = randomWalk(states, idx, boundary, rng);
      _gpuPoints[idx] = finalPos;}
}

and the kernel launch like this:

finalPosition<<<nBlocks,THREADS_PER_BLOCK>>>(time(0), states , _gpuPoints, boundary, rng, N);

I get this result (output.csv):

$ cat output.csv
0.628292,-4.899494,0.774730,5.000000
0.162323,-4.930647,-0.813861,5.000000
-1.715985,-0.534316,-4.665823,5.000000
-2.411644,-3.632435,-2.447323,5.000000
-3.418264,-0.851781,3.548231,5.000000
-2.850476,-2.937130,-2.871943,5.000000
0.072410,3.170733,-3.865386,5.000000
1.959057,-0.443189,-4.578829,5.000000
2.031133,-3.467616,-2.974919,5.000000
-2.107327,3.904619,2.305021,5.000000
-4.639953,1.667007,-0.831821,5.000000
3.720370,1.624804,2.918708,5.000000
-1.534095,-3.247724,3.478339,5.000000
-3.888582,0.315719,-3.127179,5.000000
-0.054493,-4.998784,-0.095864,5.000000
3.298623,3.518482,1.318854,5.000000
4.641367,-1.859068,-0.039635,5.000000
-3.671611,-0.072624,3.393228,5.000000
-1.256829,-0.310876,4.829465,5.000000
-2.492307,-4.182354,1.138562,5.000000
1.395312,-2.987793,3.758483,5.000000
-2.762215,-3.152503,2.726151,5.000000
3.101520,-1.983825,-3.383048,5.000000
-2.169484,3.941614,2.181059,5.000000
-3.971401,-1.138357,-2.816402,5.000000
-2.118435,-1.203381,4.366246,5.000000
3.319744,-3.698802,-0.546043,5.000000
2.737933,3.012805,-2.902883,5.000000
-2.870568,-0.945093,3.983295,5.000000
3.576528,-2.390892,2.547957,5.000000
4.602388,0.700673,1.824028,5.000000
0.122336,4.979045,-0.440617,5.000000
-0.935764,-1.534525,4.665789,5.000000
3.667711,-3.357755,0.522854,5.000000
-1.289282,1.290344,4.655402,5.000000
-3.764930,-3.280344,-0.254253,5.000000
4.267314,-0.811147,2.476302,5.000000
-3.693138,3.297244,-0.699224,5.000000
-1.038960,-2.293650,-4.319691,5.000000
-4.245689,0.974306,2.454558,5.000000
-3.710622,2.254789,2.479358,5.000000
-0.739412,-2.375453,-4.337107,5.000000
-1.122346,0.997810,4.769142,5.000000
4.641891,-1.289307,-1.338109,5.000000
3.943014,-2.680164,-1.506439,5.000000
-1.657783,0.458186,-4.694871,5.000000
2.903168,-3.962222,0.934030,5.000000
1.922109,4.382765,-1.448057,5.000000
2.943883,4.041326,0.035208,5.000000
3.264783,1.974566,-3.231452,5.000000
-3.273946,-2.057536,-3.169830,5.000000
0.055952,-3.367576,3.695442,5.000000
-0.072741,-4.568989,-2.029546,5.000000
-0.157276,4.870314,-1.120405,5.000000
1.299422,0.099700,4.827169,5.000000
2.791323,2.083337,3.587231,5.000000
-0.769589,2.674135,-4.154123,5.000000
-0.424974,2.674058,4.203428,5.000000
-1.297806,1.828922,4.468864,5.000000
1.356144,3.977489,-2.709327,5.000000
-4.020390,1.910192,2.277636,5.000000
0.859541,-4.891906,-0.574843,5.000000
0.760309,4.836938,-1.012894,5.000000
-4.918316,0.898741,-0.049279,5.000000
2.159176,-0.357519,4.495569,5.000000
1.337239,3.632694,-3.164700,5.000000
1.287019,3.640088,3.177001,5.000000
4.175551,-2.552966,1.023299,5.000000
-4.189130,-2.710545,0.322699,5.000000
-3.775866,0.422600,3.250269,5.000000
1.227863,1.939098,-4.442100,5.000000
-0.910808,-0.769251,-4.855789,5.000000
-2.836509,-4.018154,0.899253,5.000000
-0.943431,-4.248322,-2.462051,5.000000
4.839777,-0.542668,1.132283,5.000000
-0.543598,-4.860043,1.041386,5.000000
2.096293,0.731096,4.480072,5.000000
1.515222,-4.503112,1.557589,5.000000
0.391035,-2.820461,-4.109999,5.000000
-4.697918,1.659874,-0.417592,5.000000
0.731389,4.766176,1.322361,5.000000
-4.971092,0.391872,0.366991,5.000000
4.683945,1.503105,-0.895171,5.000000
0.094646,0.803327,4.934137,5.000000
-4.756599,-1.063119,1.115591,5.000000
-4.741367,0.601832,1.468751,5.000000
-0.622062,-4.399431,-2.293043,5.000000
3.998584,-2.430138,-1.762316,5.000000
2.889354,3.753414,-1.601098,5.000000
4.619578,-1.843518,0.510819,5.000000
-3.468601,-3.576452,-0.421662,5.000000
-2.446475,-3.452250,-2.663969,5.000000
0.611008,4.935348,-0.518658,5.000000
3.356182,3.689818,-0.348258,5.000000
2.723260,-4.022014,-1.186279,5.000000
2.515270,-3.615386,-2.366938,5.000000
1.461690,-4.704300,0.856170,5.000000
-1.220645,3.493145,3.362731,5.000000
-3.669620,3.239435,-1.019782,5.000000
-3.316329,1.182409,3.550193,5.000000
4.916836,-0.796389,0.436450,5.000000
-0.622584,-1.670654,4.671328,5.000000
-1.539724,3.515036,-3.205272,5.000000
-2.272659,3.932659,-2.090267,5.000000
1.659590,0.629000,-4.674411,5.000000
2.067366,-4.000756,2.172545,5.000000
-3.875296,1.900115,-2.524211,5.000000
3.605831,-3.310765,1.018244,5.000000
-0.772092,-4.551371,-1.920650,5.000000
2.968601,-4.023230,-0.032172,5.000000
-1.503622,-3.879141,2.773334,5.000000
-1.722315,-1.940946,-4.273916,5.000000
1.193075,3.128174,3.713637,5.000000
4.582112,1.741668,-0.985310,5.000000
-1.585273,-3.350112,-3.356137,5.000000
3.985136,-2.446971,-1.769469,5.000000
3.462019,2.040801,-2.974820,5.000000
2.336477,1.321345,4.218403,5.000000
-1.968305,4.097646,-2.082083,5.000000
3.373862,1.969776,3.120422,5.000000
-4.997004,0.112979,-0.131096,5.000000
-3.184446,1.498715,-3.551501,5.000000
-4.962571,0.586419,0.170300,5.000000
-2.533729,3.452926,-2.580216,5.000000
-0.292847,4.670508,1.760851,5.000000
4.836363,1.059029,0.698605,5.000000
2.820885,-4.074259,-0.665598,5.000000
-2.115496,4.106643,1.913154,5.000000
1.624954,3.679764,2.969656,5.000000
4.967940,-0.505954,-0.252150,5.000000
-4.672419,-1.567572,-0.843337,5.000000
3.070334,2.869947,-2.708589,5.000000
2.897243,3.452626,2.164568,5.000000
-3.926629,-1.834329,-2.493356,5.000000
1.167627,-3.817905,3.010024,5.000000
3.711214,2.119984,2.594717,5.000000
2.891797,0.411721,-4.058078,5.000000
-4.938633,-0.489490,0.608526,5.000000
2.090108,3.137994,3.283968,5.000000
-4.941360,0.246557,-0.722615,5.000000
3.025169,3.877938,-0.899971,5.000000
-0.057637,-1.374093,4.807135,5.000000
0.834437,4.757398,-1.292628,5.000000
-2.652762,1.304395,4.032544,5.000000
-2.801193,2.044116,3.602070,5.000000
3.026658,-3.871065,0.924228,5.000000
4.370097,2.405023,0.343677,5.000000
2.026850,-2.908674,-3.525833,5.000000
0.569686,-4.203772,-2.646461,5.000000
-4.491343,-1.708262,1.381909,5.000000
-2.891552,-2.594137,-3.147916,5.000000
4.539808,1.979339,-0.687284,5.000000
3.895631,-2.867161,1.266274,5.000000
-4.846499,-0.577102,-1.085540,5.000000
3.875701,-1.328161,-2.866169,5.000000
-3.538635,-1.043489,3.374788,5.000000
-0.142181,-4.997194,0.088522,5.000000
-1.737878,2.866842,-3.709582,5.000000
2.625108,-2.538683,3.415245,5.000000
-1.590130,-2.351951,4.115800,5.000000
-2.037973,-3.422420,3.022202,5.000000
1.821700,-3.040913,3.526224,5.000000
0.371202,4.985830,-0.060914,5.000000
4.683237,-0.619066,-1.638306,5.000000
2.398519,-4.361123,-0.477192,5.000000
3.776791,1.066108,3.098268,5.000000
-1.047291,2.938777,-3.907271,5.000000
-3.603310,1.989184,2.838891,5.000000
1.356899,3.938558,-2.765246,5.000000
4.458138,1.741543,1.446385,5.000000
-1.534129,2.878480,3.789565,5.000000
0.197157,-1.983445,-4.585529,5.000000
1.337347,4.006208,-2.676154,5.000000
1.458471,-3.795124,2.910309,5.000000
-0.761919,4.939224,0.153434,5.000000
4.058095,2.718427,1.068650,5.000000
-4.893477,-0.106711,1.021024,5.000000
-0.265158,4.754215,1.525495,5.000000
3.515460,3.011975,1.889325,5.000000
3.462154,-0.533415,3.567767,5.000000
-1.368027,3.607505,3.180316,5.000000
-3.604319,3.357449,0.858149,5.000000
-4.472452,1.961971,1.071376,5.000000
0.718252,-1.359473,-4.757725,5.000000
2.046570,4.513433,-0.663678,5.000000
-0.944693,1.298234,4.735203,5.000000
4.330425,-0.872982,-2.342077,5.000000
-3.978647,2.122892,2.159560,5.000000
0.277452,-0.264253,4.985297,5.000000
4.141907,-1.149088,2.554252,5.000000
-0.233733,-4.282661,2.569860,5.000000
-1.016823,1.102844,-4.769676,5.000000
2.170278,0.935831,4.406145,5.000000
-0.687388,0.576073,-4.918906,5.000000
-2.245424,-0.350784,4.453653,5.000000
1.744440,-2.652875,-3.862536,5.000000
-2.825023,3.965294,-1.138282,5.000000
4.314280,0.748011,-2.414015,5.000000
3.495596,-3.334958,1.287970,5.000000
1.075361,3.787970,3.081377,5.000000
-1.785661,-2.453979,-3.973588,5.000000
2.323098,4.402976,0.465858,5.000000
0.018438,-4.230187,-2.665554,5.000000
4.348341,-1.989394,1.460906,5.000000
-1.429905,4.039118,-2.576994,5.000000
-1.250704,0.451433,-4.819953,5.000000
0.167199,-2.760357,4.165630,5.000000
-2.622747,4.252591,-0.191487,5.000000
-4.118810,1.311523,2.513029,5.000000
0.877228,-4.601123,1.749324,5.000000
2.018730,-2.815201,-3.605464,5.000000
-1.223365,-4.307477,2.224640,5.000000
-1.950934,4.297880,1.649874,5.000000
-0.131930,-0.339385,4.986723,5.000000
1.781163,4.312893,1.796221,5.000000
1.854860,-4.261953,1.842621,5.000000
4.920663,0.883414,-0.081617,5.000000
-3.450862,-3.552723,-0.685352,5.000000
0.141668,-3.981927,-3.020627,5.000000
-0.710307,1.609614,-4.680235,5.000000
-0.913173,2.112117,4.439040,5.000000
3.299812,-3.403087,1.590674,5.000000
-4.896513,0.458815,-0.902026,5.000000
-4.692033,-1.111317,-1.322802,5.000000
1.252288,-1.454753,4.616868,5.000000
-4.081772,-2.074811,-2.008556,5.000000
-1.759481,-0.117314,4.678725,5.000000
4.809183,-0.342558,-1.324544,5.000000
0.667401,2.304548,4.386756,5.000000
0.739023,-3.974324,-2.942548,5.000000
-1.563058,4.086929,-2.419476,5.000000
1.173510,2.061344,4.401560,5.000000
0.668464,4.852658,-1.002429,5.000000
-4.419933,2.054977,1.114117,5.000000
-1.110164,-4.821075,-0.724410,5.000000
0.445619,0.343362,-4.968252,5.000000
3.654497,2.173541,2.630659,5.000000
-0.369989,0.458368,-4.965179,5.000000
2.885627,0.763048,4.011348,5.000000
-0.180556,4.945003,0.717179,5.000000
0.337894,1.747535,4.672467,5.000000
-1.756681,4.395498,1.610487,5.000000
3.864833,-2.428329,-2.041148,5.000000
-3.238250,2.699637,-2.688065,5.000000
3.534358,-3.506072,0.464514,5.000000
-4.732568,1.448340,0.710711,5.000000
-2.408191,1.378415,-4.159398,5.000000
-4.525957,-1.632861,-1.359957,5.000000
3.973403,2.903845,-0.883036,5.000000
-4.613844,1.607693,1.061963,5.000000
0.118624,-3.210011,3.831678,5.000000
4.347818,2.467214,-0.096607,5.000000
-3.273662,-3.779224,0.024393,5.000000
-1.548274,-1.125134,4.619190,5.000000
2.784302,-1.157474,-3.988473,5.000000
-3.413198,-3.552125,-0.855856,5.000000

which does not appear to have "spooky" values in it. (And the cuda-memcheck errors disappear.) If there are still "spooky" values, then you're going to need to provide a clearer definition of the results you expect.

Robert Crovella
  • 143,785
  • 11
  • 213
  • 257