0

Here is the simple code which reproduces the error that I get:

#include <math.h> 
#include <iostream>
//#include <omp.h>
//handling Not a number exception:
#include <fenv.h>
#include <signal.h>
#include "unistd.h"

void handler(int sig)
{
  printf("Floating Point Exception\n");
  exit(0);
}
#define EKCOR
const float alpha=200.0/137;
const int N=4096;//4096;//8192;//16384;
const float md=940;
const float Ep=0.1f;
float E1;
int STEP=1;
struct float3
{
  float x, y, z;
};
float3 Pi;
struct Particle
{
  float x;
  float y;
  float z;
  float t;
  float vx;
  float vy;
  float vz;
  float m;
};
Particle p[N] __attribute__((aligned(64)));
inline float3 RandomDirection()
{
  float number1 = static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
  float z   = 2.0*number1 - 1.;  
  float rho = sqrtf((1.+z)*(1.-z));
  float number2 = static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
  float phi = M_PI*2.0*number2;
  float3 result={rho*cosf(phi), rho*sinf(phi), z};
  return result;
}
void function()
{
  float K=0.0;
  Pi={0.0, 0.0, 0.0};
  double Px=0.0;
  double Py=0.0;
  double Pz=0.0;
  float P3=0.0;
  float P4=0.0;
  //#1
  for(int i=0; i<N; ++i)
  {
    Px+=p[i].vx*p[i].m;
    Py+=p[i].vy*p[i].m;
    Pz+=p[i].vz*p[i].m;
    float Penergy=0.0;
  #pragma novector
    for(int j=0; j<N; ++j) if(i!=j)
    {
      float rdist1=sqrt((p[i].x-p[j].x)*(p[i].x-p[j].x)+(p[i].y-p[j].y)*(p[i].y-p[j].y)+(p[i].z-p[j].z)*(p[i].z-p[j].z));
      Penergy+=alpha/rdist1;
      P4+=alpha/rdist1;
    }
    P3+=Penergy;
    float v2=p[i].vx*p[i].vx+p[i].vy*p[i].vy+p[i].vz*p[i].vz;
    K+=p[i].m*v2/2;
  }
  P4/=2;
  Pi.x=Px;
  Pi.y=Py;
  Pi.z=Pz;
  P3/=2;
  float E2=K+P3;
  float r=(E1-P3)/K;
  std::cout<<"r="<<r<<",E1="<<E1<<",P3="<<P3<<",K="<<K<<std::endl;
  float rc=sqrt(r);
  std::cout<<"E2="<<K+P3<<",K="<<K<<",P3="<<P3<<",P4="<<P4<<",Px="<<Pi.x<<",Py="<<Pi.y<<",Pz="<<Pi.z<<std::endl;
}
void init()
{
  const double pi=3.1415926536;   
  float RADIUS=pow(50.0*N,1.0/3.0);
  Pi={0.0, 0.0, 0.0};
  double Px=0.0, Py=0.0, Pz=0.0;
#pragma omp single
  for(int i=0; i<N; ++i)
  {
    float DISTANCE=0.0f;
    if(i>0)
    {
      while(DISTANCE<=1.0f)
      {
        float theta=pi*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
        float phi=2*pi*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
        float rr=RADIUS*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));       
        p[i].x =rr*sin(theta)*cos(phi);
        p[i].y =rr*sin(theta)*sin(phi);
        p[i].z =rr*cos(theta);
        DISTANCE=10000.0f;
      #pragma simd reduction(min:DISTANCE)     
        for(int j=0; j<i; ++j)
        {
          float dij=sqrt((p[i].x-p[j].x)*(p[i].x-p[j].x)+(p[i].y-p[j].y)*(p[i].y-p[j].y)+(p[i].z-p[j].z)*(p[i].z-p[j].z));
          if(dij<DISTANCE) DISTANCE=dij;
        }
      }
    }
    else
    {
      float theta=pi*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
      float phi=2*pi*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));
      float rr=RADIUS*static_cast <float> (rand())/(static_cast <float> (RAND_MAX));       
      p[i].x =rr*sin(theta)*cos(phi);
      p[i].y =rr*sin(theta)*sin(phi);
      p[i].z =rr*cos(theta);
    }
    float modv=sqrt(2.0*Ep/md);
    float3 res=RandomDirection();
    float3 v;
    v.x=modv*res.x;
    v.y=modv*res.y;
    v.z=modv*res.z; 
    p[i].vx =v.x;
    p[i].vy =v.y;
    p[i].vz =v.z;
    p[i].m=md;
    Px+=p[i].vx*p[i].m;
    Py+=p[i].vy*p[i].m;
    Pz+=p[i].vz*p[i].m;   
  }
  Px/=N;
  Py/=N;
  Pz/=N;
#pragma novector
  for(int i=0; i<N; ++i)
  {
    p[i].vx-=Px/p[i].m;
    p[i].vy-=Py/p[i].m;
    p[i].vz-=Pz/p[i].m;
  }
  Px=0.0, Py=0.0, Pz=0.0;
  float K1=0.0;
  float P1=0.0;
  float P2=0.0;
  //#2
#pragma novector
  for(int i=0; i<N; ++i)
  {
    Px+=p[i].vx*p[i].m;
    Py+=p[i].vy*p[i].m;
    Pz+=p[i].vz*p[i].m;
    K1+=p[i].vx*p[i].vx+p[i].vy*p[i].vy+p[i].vz*p[i].vz;
    float pp=0.0;
  #pragma novector
    for(int j=0; j<N; ++j) if(i!=j)
    {
       float rd=sqrt((p[i].x-p[j].x)*(p[i].x-p[j].x)+(p[i].y-p[j].y)*(p[i].y-p[j].y)+(p[i].z-p[j].z)*(p[i].z-p[j].z));
       P1+=alpha/rd;
       pp+=alpha/rd;
    }
    P2+=pp;
  }
  Pi.x=Px;
  Pi.y=Py;
  Pi.z=Pz;
  K1*=md/2;
  P1/=2;
  P2/=2;
  E1=K1+P1;
  std::cout<<"INIT Px="<<Pi.x<<" Py="<<Pi.y<<" Pz="<<Pi.z<<" K1="<<K1<<" P1="<<P1<<" P2="<<P2<<" E1="<<E1<<std::endl;
}

int
main(int argc, char **argv)
{
  //handling Not a number exception:
  feenableexcept(FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW);
  signal(SIGFPE, handler);
  //
  init();
  function();
 }

At N<1024 P1=P2 and P3=P4. Only at N=256 there is a small difference:

N=256 P1=3492.48 P2=3492.5 P3=3492.5 P4=3492.48

But at N=1024 and N>1024 the difference becomes more and more:

N=1024 P1=34968.6 P2=34969.7 P3=34969.7 P4=34968.6
N=2048 P1=114493 P2=114482 P3=114482 P4=114493
N=4096 P1=357880 P2=362032 r=-9.14142

Here the program crashes, because r=-9.14142 and sqrt(r) throws floating point exception.

My OS is Fedora 23, processor is Intel Core i7-3770, and i used compilers gcc version 5.3.1 and intel c++ compiler icpc version 17.0.1, if it is necessary. They both gave an error even if not to use OpenMP.

The description of the trouble is given below the code. My question is:

  1. Why P1 differs from P2 and P3 differs from P4 so great at N>=1024 (one may compile with Intel (icpc) or gcc (g++) compiler without arguments)? The program is running in 1 thread. They must have the same value!

  2. I need to write the code so that the nested for loops #1 and #2 were parallelized using

    #pragma omp parallel for reduction(+:P) for(int i=0; i(p[i].x-p[j].x)+(p[i].y-p[j].y)(p[i].y-p[j].y)+(p[i].z-p[j].z)*(p[i].z-p[j].z)); PP+=alpha/r; } P+=PP; } P/=2;

    and worked with all optimization flags (i use the set -DCMAKE_CXX_FLAGS="-march=native -mtune=native -ipo16 -fp-model fast=2 -O3 -qopt-report=5 -mcmodel=large" for the Intel compiler). I can't do it (even with only "-O0"). May be because of the 1) error, it gives me a wrong value.

Paul Floyd
  • 5,530
  • 5
  • 29
  • 43
And
  • 310
  • 1
  • 12
  • 5
    *They must be equal!* -- [Is floating point math broken?](https://stackoverflow.com/questions/588004/is-floating-point-math-broken). – PaulMcKenzie Sep 28 '18 at 12:31
  • 2
    once you start parallelizing the loops you can get very different rounding errors. Either drop the "They must have the same value!" expectation or dont use a number type that comes with rounding errors – 463035818_is_not_an_ai Sep 28 '18 at 12:57
  • Is there any way you can reduce the code to a **minimal** example? – Zulan Sep 28 '18 at 13:00
  • 1
    Does using double precision affect your results? – Paul Floyd Sep 28 '18 at 13:11
  • @Paul Floyd, I have just replaced all the floats with doubles in this program and, yes, it seems to solve the problem, P1=P2=P3=P4 for all N<32768 (checked). But in my original program i need to use floats for the fastest performance. – And Sep 28 '18 at 13:21
  • @Zulan, You can delete the function() function and its body from the source code. Then the question is why P1=114493 and P2=114482 (the print is at the end of init()) at N=2048. – And Sep 28 '18 at 13:26

3 Answers3

1

You may also be interest on why floating point arithmetic don't usually do what you think it should do series on randomascii. Here is just one quotation of one article exploring the question why computers aren't exact at floating point (math-like) computations:

Floating point math is not exact. Simple values like 0.1 cannot be precisely represented using binary floating point numbers, and the limited precision of floating point numbers means that slight changes in the order of operations or the precision of intermediates can change the result. That means that comparing two floats to see if they are equal is usually not what you want.

(...)

Here’s one example of the inexactness that can creep in:

float f = 0.1f;
float sum;
sum = 0;

for (int i = 0; i < 10; ++i)
    sum += f;
float product = f * 10;
printf("sum = %1.15f, mul = %1.15f, mul2 = %1.15f\n",
        sum, product, f * 10);

This code tries to calculate 'one' in three different ways: repeated adding, and two slight variants of multiplication. Naturally we get three different results, and only one of them is 1.0:

sum=1.000000119209290, mul=1.000000000000000,  mul2=1.000000014901161

(...)

Here are the exact values for 0.1, float(0.1), and double(0.1):

==========================================================================
| Number     | Value                                                     |
|------------|-----------------------------------------------------------|
| 0.1        | 0.1 (of course)                                           |
| float 0.1  | 0.100000001490116119384765625                             |
| double 0.1 | 0.1000000000000000055511151231257827021181583404541015625 |
==========================================================================

With that settled, let’s look at the results of the code above:

  1. sum = 1.000000119209290: this calculation starts with a rounded value and then adds it ten times with potential rounding at each add, so there is lots of room for error to creep in. The final result is not 1.0, and it is not 10 * float(0.1). However it is the next representable float above 1.0, so it is very close.
  2. mul = 1.000000000000000: this calculation starts with a rounded value and then multiplies by ten, so there are fewer opportunities for error to creep in. It turns out that the conversion from 0.1 to float(0.1) rounds up, but the multiplication by ten happens to, in this case, round down, and sometimes two rounds make a right. So we get the right answer for the wrong reasons. Or maybe it’s the wrong answer, since it isn’t actually ten times float(0.1) !
  3. mul2 = 1.000000014901161: this calculation starts with a rounded value and then does a double-precision multiply by ten, thus avoiding any subsequent rounding error. So we get a different right answer – the exact value of 10 * float(0.1) (which can be stored in a double but not in a float).

So, answer one is incorrect, but it’s only one float away. Answer two is correct (but inexact), whereas answer three is completely correct (but appears wrong).

Emphasis and markup are mine. The post on randomascii even suggests some possible solutions to this inexactness problem, but they don't solve the problem (they just move the inexactness to different parts of the floating number line).

So, when dealing with floating point arithmetic you will never get an exact result. But there are things you can do to increase the precision of your calculation:

  1. Increase the number of significant bits in your floating point. C++'s floats have 21 significant bits (roughly 7 significant digits) and doubles have 52 significant bits (roughly ~17 significant digits)
  2. Reduce the number of computations involved (so 4.0*c is more precise than c+c+c+c)
  3. Try to guarantee that you will do the exact same computations in the exact same order (only then you can ==/!= the two values and get a reasonable result)

So, for example, if you change your code floats (7 digits of precision) to doubles (17 digits of precision) you will see that your results get more accurate and show more digits. If you try to use parallelization in your code your computations may (or may not, depending on the implementation) happen in different orders at different threads/cores, resulting in a wildly different floating point precision for each number involved.

As an example, here's randomascii's code using double instead of floats:

  double f = 0.1;
  double sum;
  sum = 0;

  for (int i = 0; i < 10; ++i)
      sum += f;
  double product = f * 10;
  printf("sum = %1.15f, mul = %1.15f, mul2 = %1.15f\n",
          sum, product, f * 10);

Which outputs:

  sum = 1.000000000000000, mul = 1.000000000000000, mul2 = 1.000000000000000

Which may seem correct, but when you increase the precision of the printf from 1.15f to 1.17f:

  sum = 0.99999999999999989, mul = 1.00000000000000000, mul2 = 1.00000000000000000

Again, you can see that inexactness has creeped in. sum did 10 operations of + while mul and mul2 did one operation * each so that is why sum imprecision is greater than imprecision of the other two.

If even 17 digits of precision aren't enough for you, then you may be interested in solutions of arbitrary precision for C++.

Definition of BigNum from wikipedia:

In computer science, arbitrary-precision arithmetic, also called bignum arithmetic, multiple-precision arithmetic, or sometimes infinite-precision arithmetic, indicates that calculations are performed on numbers whose digits of precision are limited only by the available memory of the host system.

(...)

Arbitrary precision is used in applications where the speed of arithmetic is not a limiting factor, or where precise results with very large numbers are required.

Again, emphasis mine.

Here's a related answer suggesting a BigNum library for C++:

The GNU Multiple Precision Arithmetic Library does what you want http://gmplib.org/

Here is the previous code implemented using GMP (using 64 bits of precision or roughly 21 significant digits):

 // Compile like: g++ question.cpp -o out.tmp -lgmpxx -lgmp
 #include <stdio.h>
 #include <gmpxx.h>

 int main(){
      mpf_class f("0.1", 64);
      mpf_class sum("0", 64);

      for (int i = 0; i < 10; ++i)
          sum += f;
      mpf_class product = f * 10;
      printf("sum = %1.17f, mul = %1.17f, mul2 = %1.17f\n",
             sum.get_d(), product.get_d(), ((mpf_class) (f * 10)).get_d());
 }

Which outputs:

  sum = 0.99999999999999989, mul = 0.99999999999999989, mul2 = 0.99999999999999989

Which is the result of doing the computation in 64 bits of precision and then rounding to 51 bits (C++'s double) and printing it.

You could, however, print the value directly from GMP:

 // Compile like: g++ question.cpp -o out.tmp -lgmpxx -lgmp
 #include <stdio.h>
 #include <gmpxx.h>
 #include <string>

 int main(){
      mpf_class f("0.1", 64);
      mpf_class sum("0", 64);

      for (int i = 0; i < 10; ++i)
          sum += f;
      mpf_class product = f * 10;
      long exp = 10;
      int base = 10;
      int digits = 21;
      printf("sum = %s, mul = %s, mul2 = %s\n",
             sum.get_str(exp, base, digits).c_str(),
             product.get_str(exp, base, digits).c_str(),
             ((mpf_class) (f * 10)).get_str(exp, base, digits).c_str());
 }

Which outputs:

      sum = 1, mul = 1, mul2 = 1

Which is a result more precise than the double representation. You can check the GMP C++ Interface here and here. Note, however, that arbitrary precision libraries are usually slower than built-in floats or doubles. The upside is that in order to increase precision you just need to change the mpf_class variable(expression, precision); line.

Also don't forget to check PaulMcKenzie's suggestion Stack Overflow: Is floating point math broken?:

Question:

Consider the following code:

0.1 + 0.2 == 0.3 -> false

0.1 + 0.2 -> 0.30000000000000004

Why do these inaccuracies happen?

Answer:

Binary floating point math is like this. In most programming languages, it is based on the IEEE 754 standard. (...) The crux of the problem is that numbers are represented in this format as a whole number times a power of two; rational numbers (such as 0.1, which is 1/10) whose denominator is not a power of two cannot be exactly represented.

The constants 0.2 and 0.3 in your program will also be approximations to their true values. It happens that the closest double to 0.2 is larger than the rational number 0.2 but that the closest double to 0.3 is smaller than the rational number 0.3. The sum of 0.1 and 0.2 winds up being larger than the rational number 0.3 and hence disagreeing with the constant in your code.

Emphasis and markup are mine.

Community
  • 1
  • 1
  • Thank You very much for the explanation! Although the problem isn't clear for me yet, i will study it later, and thank You for Your guidelines. Especially useful (which i understood best of all) were the last 2 paragraphs and the words: "The crux of the problem is that numbers are represented in this format as a whole number times a power of two; rational numbers (such as 0.1, which is 1/10) whose denominator is not a power of two cannot be exactly represented." – And Sep 30 '18 at 10:50
0

You may need to do more analysis, but my first guess is that your summation loops are causing problems.

Three pointers to techniques to improve precision loss:

  1. Order the items by increasing size - this might be too costly if they are not already sorted.
  2. Pairwise summation
  3. Kahan summation
Paul Floyd
  • 5,530
  • 5
  • 29
  • 43
0

Notice that even though, in a theorethical sense, P1 should equal P2 and P3 should equal P4, these are floating point variables. Even more, they are single precision floating point variables. Depending on the order of computations, you will certainly get different results. Errors acummulate at each computation due to the inexact nature of floating point representation.

Please take a look and run the following code (tst_float.cpp):

/* g++ -Wall tst_float.cpp -o tst_float && ./tst_float */

#include <stdio.h>

int main()
{
    int ok;
    int i;
    float x;

    x = 0.0;
    for (i = 0; i < 10; ++i) {
        x += 0.1;
    }

    ok = x == 1.0;

    if (ok) {
        printf("ok!\n");
    } else {
        printf("uh-uh?\n");
    }
    printf("x == %10.9f\n", x);

    return 0;
}

I get:

$ g++ -Wall tst_float.cpp -o tst_float && ./tst_float
uh-uh?
x == 1.000000119

To summarize, don't treat floating point variables as if they have the precision of integer variables.

  • Yes, i ran your program and received the same result. Maybe, You wanted to write ok = x == 1.0; instead of ok = x == 10.0;? But what i wanted to ask is the different thing: now icreated one more nested loop at the en of main(). My question was: Why if i sum as follows: for(int i=0; i – And Sep 28 '18 at 14:40
  • My question was: Why if i sum as: float P=0.0; for(int i=0; i – And Sep 28 '18 at 14:52
  • I strongly think that the result should be the same. Because the program works in 1 thread. THIS IS WHAT I DON'T UNDERSTAND AND WANTED TO ASK TO HELP CLEAR OUT. – And Sep 28 '18 at 14:52
  • If you have a sensitive sum, you can have different results whenever you compute things in a different order. In one case you restart each partial sum to zero. In the other case, you don't. So, in principle you could have different end results. If it works in a single thread, it seems that it should also work in a parallel mode, but again, when computed in parallel, the sum is not performed in the same order, so different truncations may occur. – Marcelo Roberto Jimenez Sep 28 '18 at 17:06
  • 1
    @And The bottom line is this -- no matter how much you want to make floating point computations "exact", you are fighting a losing battle and will be wasting your time. You can never guarantee a calculation comes out exact when run on a binary computing machine, that's the way it is. You either have to rewrite the formula to reduce the error propagation, and/or accept that answers will not be exact, thus have a tolerance level (if the answer is within a certain tolerance, consider the result "equal"). – PaulMcKenzie Sep 28 '18 at 17:12
  • I only changed the types of P1,P2 from float to double and left other "float" at their place. This gave the following result: – And Sep 29 '18 at 06:00
  • I only changed the types of P1, P2 from float to double and left other "float" at their place. This gave the following result: P1=36414359.505808778107 and P2=36414359.499481201172 at N=65536. At N=2048 there is P1=111538.13107203163963 and P2=111538.13442611694336. I'll think it over, but now i suppose this will be enough for my purposes. Why changing the type of only 3 variables leads to such a dramatic increase of accuracy? – And Sep 29 '18 at 06:07