I am trying to understand up to which point I can add or substract floating point numbers that are very far apart (e.g. 3.34e-20 + 1.22) with a correct answer. I ran this code:
#include <iostream>
#include <cfloat>
int main(){
std::cout.precision(30);
float small_f = 3.34e-20;
float large_f = 1.22;
float sum_f = small_f + large_f;
std::cout<<"float: Minimum normalized value: "<< FLT_MIN <<std::endl;
std::cout<<"float: small = "<< small_f <<std::endl;
std::cout<<"float: large = "<< large_f <<std::endl;
std::cout<<"float: sum = "<< small_f << "+" << large_f << " = " << sum_f <<std::endl;
std::cout<<"float: (sum*2.0 - 1.22*2.0)/2.0 = "<< (sum_f*2.0-1.22*2.0)/2.0 <<std::endl;
std::cout<<"float: (sum*2.0 - large*2.0)/2.0 = "<< (sum_f*2.0-large_f*2.0)/2.0 <<std::endl;
std::cout<<std::endl;
long double small_ld = 3.34e-20;
long double large_ld = 1.22;
long double sum_ld = small_ld + large_ld;
std::cout<<"long double: Minimum normalized value: "<< LDBL_MIN <<std::endl;
std::cout<<"long double: small = "<< small_ld <<std::endl;
std::cout<<"long double: large = "<< large_ld <<std::endl;
std::cout<<"long double: sum = "<< small_ld << "+" << large_ld << " = " << sum_ld <<std::endl;
std::cout<<"long double: (sum*2.0 - 1.22*2.0)/2.0 = "<< (sum_ld*2.0-1.22*2.0)/2.0 <<std::endl;
std::cout<<"long double: (sum*2.0 - large*2.0)/2.0 = "<< (sum_ld*2.0-large_ld*2.0)/2.0 <<std::endl;
return EXIT_SUCCESS;
}
Ouput:
float: Minimum normalized value: 1.17549435082228750796873653722e-38
float: small = 3.3399998940068452799152744128e-20
float: large = 1.2200000286102294921875
float: sum = 3.3399998940068452799152744128e-20+1.2200000286102294921875 = 1.2200000286102294921875
float: (sum*2.0 - 1.22*2.0)/2.0 = 2.86102295188328525910037569702e-08
float: (sum*2.0 - large*2.0)/2.0 = 0
long double: Minimum normalized value: 3.36210314311209350626267781732e-4932
long double: small = 3.3400000000000001388033392343e-20
long double: large = 1.219999999999999973354647409
long double: sum = 3.3400000000000001388033392343e-20+1.219999999999999973354647409 = 1.219999999999999973354647409
long double: (sum*2.0 - 1.22*2.0)/2.0 = 0
long double: (sum*2.0 - large*2.0)/2.0 = 0
I don't understand why I can't obtain the correct answer, even for the float type, because all the values of the computations are higher than the minimum normalized value (~1e-38).