9

I need to calculate euclidean distance between two points in the fastest way possible. In C.

My code is this and seems a little bit slow:

float distance(int py, int px, int jy, int jx){
     return sqrtf((float)((px)*(px)+(py)*(py)));
}

Thanks in advance.

EDIT:

I'm sorry I hadn't been clear. I'd better specify the context: I'm working with images and I need the euclidean distance from each pixel to all the other pixels. So I have to calculate it a lot of times. I can't use the square of the distance. I'll add a little bit more code to be more clear:

for (jy=0; jy<sizeY; jy++) {
        for (jx=0; jx<sizeX; jx++) {
            if (jx==px && jy==py) {
                ;
            }
            else{
                num+=rfun(imgI[py][px].red-imgI[jy][jx].red)/distance(py, px, jy, jx);
                den+=RMAX/distance(py, px, jy, jx);
            }
        }
    }


 float distance(int py, int px, int jy, int jx){
     return sqrtf((float)((px-jx)*(px-jx)+(py-jy)*(py-jy)));
}

That's what I have t do. And I have to do it with all the pixels (px, py)

EDIT2: I'm sorry i wasn't clear but I tried to keep the question as general as I could. I'm writing a program to process images with an algorithm. The big problem is the time because I have to do it really really fast. Now what I need to optimize is this function: `float normC(int py, int px, int color, pixel** imgI, int sizeY, int sizeX){

int jx, jy;
float num=0, den=0;
if (color==R) {
    for (jy=0; jy<sizeY; jy++) {
        for (jx=0; jx<sizeX; jx++) {
            if (jx==px && jy==py) {
                ;
            }
            else{
                num+=rfun(imgI[py][px].red-imgI[jy][jx].red)/distance(py, px, jy, jx);
                den+=RMAX/distance(py, px, jy, jx);
            }
        }
    }
}
else if (color==B){
    for (jy=0; jy<sizeY; jy++) {
        for (jx=0; jx<sizeX; jx++) {
            if (jx==px && jy==py) {
                ;
            }
            else{
                num+=rfun(imgI[py][px].blue-imgI[jy][jx].blue)/distance(py, px, jy, jx);
                den+=RMAX/distance(py, px, jy, jx);
            }
        }
    }
}
else if (color==G){
    for (jy=0; jy<sizeY; jy++) {
        for (jx=0; jx<sizeX; jx++) {
            if (jx==px && jy==py) {
                ;
            }
            else{
                num+=rfun(imgI[py][px].green-imgI[jy][jx].green)/distance(py, px, jy, jx);
                den+=RMAX/distance(py, px, jy, jx);
            }
        }
    }
}

return num/den;

} `

This function is called in a loop for every pixel(px; py) so it's called a lot of times and it takes al lot of time to compute this. The rfun function is not optimizable, because is already really simple and fast. what I need to do is to make faster the distance function.

1)I tried hypotf but it was slower than the distance function

2)I increased the optimization settings of the compiler and that made the process 2x faster!

3) I tried with a macro #define DIST(x, y) sqrtf((float)((x)*(x)+(y)*(y))) but nothing changed (as I expected)

EDIT3:

In the end I found that the fastest way was to calculate all the possible distances and store them in an array before starting computing in a loop the normC function. To make it faster I calculated the inverses of the distances so that I could use the product and not the quotient:

float** DIST    
DIST=malloc(500*sizeof(float*)); //allocating memory for the 2d array
for (i=0; i<500; i++) {
    DIST[i]=malloc(500*sizeof(float));
}

for(i=0; i<500; i++){      //storing the inverses of the distances
    for (p=0; p<500; p++) {
        DIST[i][p]= 1.0/sqrtf((float)((i)*(i)+(p)*(p)));
    }
}
float normC(int py, int px, int color, pixel** imgI, int sizeY, int sizeX){

int jx=0, jy=0;
float num=0, den=0;
if (color==R) {
    for (jy=0; jy<sizeY; jy++) {
        for (jx=0; jx<sizeX; jx++) {
            if (jx==px && jy==py) {
                ;
            }
            else if (py>=jy && px>=jx){
                num+=rfun(imgI[py][px].red-imgI[jy][jx].red)*DIST[py-jy][px-jx];
                den+=DIST[py-jy][px-jx];
            }
            else if (jy>py && px>=jx){
                num+=rfun(imgI[py][px].red-imgI[jy][jx].red)*DIST[jy-py][px-jx];
                den+=DIST[jy-py][px-jx];
            }
            else if (jy>py && jx>px){
                num+=rfun(imgI[py][px].red-imgI[jy][jx].red)*DIST[jy-py][jx-px];
                den+=DIST[jy-py][jx-px];
            }
        }
    }
}
else if (color==B){
    for (jy=0; jy<sizeY; jy++) {
        for (jx=0; jx<sizeX; jx++) {
            if (jx==px && jy==py) {
                ;
            }
            else if (py>=jy && px>=jx){
                num+=rfun(imgI[py][px].blue-imgI[jy][jx].blue)*DIST[py-jy][px-jx];
                den+=DIST[py-jy][px-jx];
            }
            else if (jy>py && px>=jx){
                num+=rfun(imgI[py][px].blue-imgI[jy][jx].blue)*DIST[jy-py][px-jx];
                den+=DIST[jy-py][px-jx];
            }
            else if (jy>py && jx>px){
                num+=rfun(imgI[py][px].blue-imgI[jy][jx].blue)*DIST[jy-py][jx-px];
                den+=DIST[jy-py][jx-px];
            }
        }
    }
}
else if (color==G){
    for (jy=0; jy<sizeY; jy++) {
        for (jx=0; jx<sizeX; jx++) {
            if (jx==px && jy==py) {
                ;
            }
            else if (py>=jy && px>=jx){
                num+=rfun(imgI[py][px].green-imgI[jy][jx].green)*DIST[py-jy][px-jx];
                den+=DIST[py-jy][px-jx];
            }
            else if (jy>py && px>=jx){
                num+=rfun(imgI[py][px].green-imgI[jy][jx].green)*DIST[jy-py][px-jx];
                den+=DIST[jy-py][px-jx];
            }
            else if (jy>py && jx>px){
                num+=rfun(imgI[py][px].green-imgI[jy][jx].green)*DIST[jy-py][jx-px];
                den+=DIST[jy-py][jx-px];
            }
        }
    }
}

return num/(den*RMAX);
}
Pol
  • 103
  • 1
  • 2
  • 8
  • 3
    Try using [hypot](http://ideone.com/qgrrmo). – Sergey Kalinichenko Aug 21 '15 at 22:22
  • like dasblinkenlight said, `math.h` has function `hypotf(float x, float y)` – gengisdave Aug 21 '15 at 22:31
  • I'm sorry. It seems a little bit slow *to me. It may be the fastest way but I don't think so. – Pol Aug 21 '15 at 22:34
  • Thanks. I'm trying it right now. – Pol Aug 21 '15 at 22:36
  • 2
    Do really need the distance and not its square? For example if you are looking for minimum distance between sets of points, you can compare the square of the distance, taking one single square root when you have finished – Weather Vane Aug 21 '15 at 22:36
  • [If you really need the distance, not its square](http://stackoverflow.com/a/3506633/179910). – Jerry Coffin Aug 22 '15 at 01:40
  • To be clear, try `hypotf(px-jx,py-jy)` (note the `f`) – chux - Reinstate Monica Aug 22 '15 at 03:34
  • I have tried `hypotf(px-jx,py-jy)` , but it's slower than the distance function I have written. Thanks anyway. – Pol Aug 22 '15 at 10:38
  • Using those functions on a 1000*1000 array calculating the distance from the pixel[100][100] the distance function was almost 200 times faster! – Pol Aug 22 '15 at 10:43
  • Does your timing change when you increase your compiler's optimization settings? A lower setting may not optimize away the double function call to `distance`. – Jongware Aug 22 '15 at 10:43
  • 1
    The edit helps, but is still lacking the types used. Further, it is not the "Euclidean distance", but `x/Euclidean distance`" that needs optimization. And for the big picture, it is still unclear how `num, den` are used. Is not _that_ the higher level optimization potential? Example, Maybe do just 1 `sqrtf()` on `num/den` if that is the needed quotient elsewhere. – chux - Reinstate Monica Aug 22 '15 at 13:55
  • `(px-jx)*(px-jx)+(py-jy)*(py-jy)` hints that code is not worried about integer overflow. – chux - Reinstate Monica Aug 22 '15 at 13:57
  • The repeated `RMAX/` is of little value. Instead `den+=1.0/distance(py, px, jy, jx);` and do a `return num/(den*RMAX)` in the end. – chux - Reinstate Monica Aug 24 '15 at 18:28
  • 1
    Thanks chug but it took a little bit more if done like you suggested. Anyway I used your advice in an other way for the last version (EDIT3), so thanks! – Pol Aug 26 '15 at 14:08

6 Answers6

11

The square root is an expensive function. If you just care about how distances compare (is this distance less than this distance, equal, etc.), you don't need the square root.

It's basically the same reason why many Digital Signal Processing frameworks (X-Midas, Midas 2k, PicklingTools) have magnitude (which is basically the same distance computation for complex numbers) and magnitude2 (which elides the square root). Sometimes all you care about is how things compare, not necessarily the actual value.

rts1
  • 1,416
  • 13
  • 15
  • Also, if you can stay within ints (by getting rid of square root), then your function can return an int and avoid the expensive conversion from int to float as well. – rts1 Aug 21 '15 at 22:40
  • 2
    Another thought: if you are using C++, make this function inline. If you are using C, make it into a macro so you don'thave to pay for function call overhead. #define DIST2(x,y) (x*x)+(y*y) – rts1 Aug 21 '15 at 22:50
3

try something like

double dx = (jx-qx);
double dy = (jy-qy);
double dist = sqrt(dx*dx + dy*dy);

if you can live with just the square (which is useful when you just want to do something like sort by distance, you can use the much more efficient

double dx = (jx-qx);
double dy = (jy-qy);
double dist = dx*dx + dy*dy;
1

It depends on what you mean fast.

First, as other people suggested that you might adjust your requirement and live with distance squared in some situations.

Next, you would not benefit much on squeezing and extra cycle on the operation, but instead consider to speed up when you need to process huge amount of such operations - vectorization, optimizing memory layout, cache utilization, parallel computing, etc. But those topic require more understanding on specific program needs.

Non-maskable Interrupt
  • 3,841
  • 1
  • 19
  • 26
0

First, in your loop you call "distance" twice in a row, with the same argument:

{
  num += foo / distance(...);
  den += bar / distance(...);
}

Declaring a new variable "tmp" and changing above code to

{
  tmp = 1 / distance(...);
  num += foo * distance;
  den += bar * distance;
}

should speed up your code considerably, although this is probably the factor 2 speed increase you are seeing with more compiler optimization. (Please find out!) Also note that I swapped the divisions for multiplications - that is cheaper in terms of computing time as well.

Second, some compilers have optimizations for 1/x^2 and maybe even 1/sqrt(x). In order to allow your compiler to make use of such optimizations, don't calculate the distance, but the inverse distance. So you return statement should be

return 1 / sqrtf(...);

For many optimizations, the CPU you are running the code on is important, too. Especially so if you wish to vectorize (do you have SSE? AVX?) or parallelize (how many cores do you have available? Are they busy doing other stuff in the meantime or can you use them?).

It's hard to be sure without trying it, but it looks like you'd be better off vectorizing rather than parallelizing (running on separate cores -- the overhead will probably eat up any performance gains you might get).

Alex
  • 336
  • 3
  • 11
  • unfortunately it takes longer to compute the program with a temporary value than calling to times the function distance – Pol Aug 26 '15 at 13:34
  • That is quite surprising. Is that with or without optimization turned on? But I see from your edit3 that you found a solution that works, so it's probably not important any more. – Alex Aug 28 '15 at 17:59
  • Yes that's surprising. I thought too that using a temporary variable should have made the process faster but it doesn't. Without optimization. Thank you anyway! – Pol Aug 28 '15 at 18:25
0

if you are working on images you may use the Fast exact Euclidean distance (FEED) transforms. https://ieeexplore.ieee.org/abstract/document/6717981/

Ahmad Hassanat
  • 425
  • 4
  • 6
0

optimal loop

np=length(pixelsCand);
dc=zeros(np);


for(ir=1:np)
    for(ic=ir+1:np)
        %dc(ir,ir)=0
        dc(ir,ic)=sqrt(sum((pixelsCand(ir,:)-pixelsCand(ic,:)).^2));
        dc(ic,ir)= dc(ir,ic);
    end
end
user3452134
  • 362
  • 4
  • 11