My guess is that your binary decomposition will work almost as well as Kahan summation.
Here is an example to illustrate it:
#include <stdio.h>
#include <stdlib.h>
#include <algorithm>
void sumpair( float *a, float *b)
{
volatile float sum = *a + *b;
volatile float small = sum - std::max(*a,*b);
volatile float residue = std::min(*a,*b) - small;
*a = sum;
*b = residue;
}
void sumpairs( float *a,size_t size, size_t stride)
{
if (size <= stride*2 ) {
if( stride<size )
sumpair(a+i,a+i+stride);
} else {
size_t half = 1;
while(half*2 < size) half*=2;;
sumpairs( a , half , stride );
sumpairs( a+half , size-half , stride );
}
}
void sumpairwise( float *a,size_t size )
{
for(size_t stride=1;stride<size;stride*=2)
sumpairs(a,size,stride);
}
int main()
{
float data[10000000];
size_t size= sizeof data/sizeof data[0];
for(size_t i=0;i<size;i++) data[i]=((1<<30)*-1.0+random())/(1.0+random());
float naive=0;
for(size_t i=0;i<size;i++) naive+=data[i];
printf("naive sum=%.8g\n",naive);
double dprec=0;
for(size_t i=0;i<size;i++) dprec+=data[i];
printf("dble prec sum=%.8g\n",(float)dprec);
sumpairwise( data , size );
printf("1st approx sum=%.8g\n",data[0]);
sumpairwise( data+1 , size-1);
sumpairwise( data , 2 );
printf("2nd approx sum=%.8g\n",data[0]);
sumpairwise( data+2 , size-2);
sumpairwise( data+1 , 2 );
sumpairwise( data , 2 );
printf("3rd approx sum=%.8g\n",data[0]);
return 0;
}
I declared my operands volatile and compiled with -ffloat-store to avoid extra precision on x86 architecture
g++ -ffloat-store -Wl,-stack_size,0x20000000 test_sum.c
and get: (0.03125 is 1ULP)
naive sum=-373226.25
dble prec sum=-373223.03
1st approx sum=-373223
2nd approx sum=-373223.06
3rd approx sum=-373223.06
This deserve a little explanation.
- I first display naive summation
- Then double precision summation (Kahan is roughly equivalent to that)
- The 1st approximation is the same as your binary decomposition. Except that I store the sum in data[0] and that I care of storing residues. This way, the exact sum of data before and after summation is unchanged
- This enables me to approximate the error by summing the residues at 2nd iteration in order to correct the 1st iteration (equivalent to applying Kahan on binary summation)
- By iterating further I can further refine the result and we see a convergence