so you have k
sets each with ~n
vectors and each vector has m
dimensions.
For tasks like this I usually use maps of all grid aligned "directions" and compute only on them instead of using the direction present in input data
this usually significantly reduce the problem combinations count and also can be done recursively with increasing precision (near last found solution with coarser grid). I would attack your case like this:
create map of major vector "directions" O(m*(s^m))
in case you have coordinate range <0,1>
then create a map of all vector combinations with some s
steps for example 11 steps will in 2D lead to
(0.0,0),(0.0,0.1)...(1.0,0.9),(1.0,1.0)
hold them in m
dimensional array of vectors for easy access again 2D example:
vec2 dir[s][s] =
{
{ (0.0,0.0),(0.0,0.1)...(0.0,1.0) },
{ (0.1,0.0),(0.1,0.1)...(0.1,1.0) },
...
{ (1.0,0.0),(1.0,0.1)...(1.0,1.0) }
};
for each vector of dir
compute max distance to each set O(n*k*(s^m))
so simply loop through all vectors of dir
and for each compute smallest distance each set and remember only the biggest one. Hold the result in m
dimensional array dis
so:
dis[i1][i2]...[im] = max( |dir[i1][i2]...[im],set[1]| ,
|dir[i1][i1]...[im],set[2]| ,
... ,
|dir[i1][i2]...[im],set[k]| )
find min distance in dis
O(s^m)
find closest element in each set to found vector in dir
corresponding to min distance in dis
O(m*n*k*m)
this is your wanted output. this might be speed up by binary search if n
is really big to O(m*log(n)*k*m)
.
Beware the s
should be chosen big enough so a valid solution is not missed. The resulting complexity is around O(m*n*k*(s^m))
assuming m
is not too big a number. Also this will be faster only if:
O(m*(n^k)) > O(n*k*(s^m))
The constant time should be relatively similar for both approaches leading to something like:
m*(n^k) ~> n*k*(s^m)
assuming "real" case where m<k
and s>n
for example m=3,k=10,n=50,s=100
:
3*(50^10) ~> 50*10*(100^3)
292968750000000000 ~> 500000000
585937500 ~> 1
See these QAs using very similar technique (map of "directions"):
[Edit1] small C++/VCL example
//---------------------------------------------------------------------------
const int K=3; // sets
const int N=3; // max set size
const int M=2; // dimensions
float set[K][N][M]=
{
{{0.1, 0.2}, {0.3, 0.4}, {0.5, 0.6}},
{{0.5, 0.9}, {0.1, 0.3}, {0.9, 0.1}},
{{0.2, 0.2}, {0.8, 0.4}, {0.5, 0.1}}
};
//---------------------------------------------------------------------------
float distance2(float *a,float *b) // |a-b|^2
{
int m;
float x,dd;
for (dd=0.0,m=0;m<M;m++)
{
x=a[m]-b[m];
dd+=x*x;
}
return dd;
}
//---------------------------------------------------------------------------
float get_closest(int *ix) // returns min distance between sets
{ // ix[K] will hold selected closest vectors from sets
const int S=11; // grid steps
const float dx=1.0/float(S-1); // grid step
int s1,s2,k,n,m,s,ss1,ss2;
float x1,x2,d,d1,d2;
float dir[S][S][M],dis[S][S];
// create dir[][]
for (s1=0,x1=0.0;s1<S;s1++,x1+=dx)
for (s2=0,x2=0.0;s2<S;s2++,x2+=dx)
{
m=0;
dir[s1][s2][m]=x1; m++;
dir[s1][s2][m]=x2; m++;
}
// compute dis[][] and its minimum dis[ss1][ss2]
ss1=0; ss2=0;
for (s1=0;s1<S;s1++)
for (s2=0;s2<S;s2++)
{
// max distance between set[] and dir[s1][s2]
for (d2=0.0,k=0;k<K;k++)
{
// min distance between set[k][] and dir[s1][s2]
for (d1=0.0,n=0;n<N;n++)
{
d=distance2(dir[s1][s2],set[k][n]);
if ((n==0)||(d1>d)) d1=d;
}
if (d2<d1) d2=d1;
}
dis[s1][s2]=d2;
// remember smallest one
if (dis[ss1][ss2]>dis[s1][s2]){ ss1=s1; ss2=s2; }
}
// find corresponding indexes from set[][]
for (k=0;k<K;k++)
for (d1=0.0,ix[k]=0,n=0;n<N;n++)
{
d=distance2(dir[ss1][ss2],set[k][n]);
if ((n==0)||(d1>d)){ d1=d; ix[k]=n; }
}
// compute real distance
for (d1=0.0,m=0;m<M;m++)
{
for (k=0;k<K;k++)
{
d=set[k][ix[k]][m];
if (k==0){ x1=d; x2=d; }
if (x1>d) x1=d;
if (x2<d) x2=d;
}
d=x2-x1; d1+=d*d;
}
return sqrt(d1);
}
//---------------------------------------------------------------------------
__fastcall TForm1::TForm1(TComponent* Owner):TForm(Owner)
{
AnsiString s;
int k,n,m,ix[K];
mm_log->Lines->Add(get_closest(ix));
for (k=0;k<K;k++)
{
for (n=ix[k],s="",m=0;m<M;m++) s+=AnsiString().sprintf("%3.1f ",set[k][n][m]);
mm_log->Lines->Add(s);
}
}
//-------------------------------------------------------------------------
Just ignore the VCL stuff (last function that uses the get_closest
and prints its results) the important stuff is function get_closest
which returns array of indexes of closest vectors from each set. Here result:
0.141421367827692
0.1 0.2
0.1 0.3
0.2 0.2
The code expects all sets are the same length, for variable one you just need to tweak it a small bit (all the loops for n
)
In case memory is an issue Note that the arrays dir,dis
might be removed if all the iterations are moved to the same loops ...
In case you want to have this ready for any dimensionality see:
look for nested_for
you can implement the s^m
loops with it easily without any weird hacks ...