I understand that it is not possible to represent all numbers to arbitrary precision with a finite number of bits, and that naive comparison of floating-point numbers is inadvisable. But I would expect that if I was adding many numbers together, the ** order ** in which I add them does not matter.
To test this prediction, I create a vector of random numbers and calculate their sum, then sort the vector and calculate the sum again. Very often, the two sums don't match! Is this a problem with my code (included below), a shortcoming of floating-point arithmetic in general, or an issue that might be resolved by switching compilers, etc.?
#include <algorithm>
#include <cstdio>
#include <cstdlib>
#include <random>
#include <vector>
double check_sum_depends_on_order(int seed)
{
// fill a vector with random numbers
std::vector<long double> v;
std::uniform_real_distribution<long double> unif(-1.,1.);
std::mt19937 rng(seed);
for (size_t i = 0; i < 1000; ++i)
{
v.push_back(unif(rng));
}
// copy this vector and then shuffle it
std::vector<long double> v2 = v;
std::sort(v2.begin(), v2.end());
// tot is running total for vector v, unsorted
// tot2 is running total for vector v2, sorted
long double tot = 0.0, tot2 = 0.0;
for (size_t i = 0; i < v.size(); ++i)
{
tot += v[i];
tot2 += v2[i];
}
// display result
// you can comment this if you do not want verbose output
printf("v tot\t= %.64Lf\n", tot);
printf("v2 tot\t= %.64Lf\n", tot2);
printf("Do the sums match (0/1)? %d\n\n", tot==tot2);
// return 1.0 if the sums match, and 0.0 if they do not match
return double(tot==tot2);
}
int main()
{
// number of trials
size_t N = 1000;
// running total of number of matches
double match = 0.;
for (size_t i = 0; i < N; ++i)
{
// seed for random number generation
int seed = time(NULL)*i;
match += check_sum_depends_on_order(seed);
}
printf("%f percent of random samples have matching sums after sorting.", match/double(N)*100.);
return 0;
}