3

The below code has two functions that does the same thing: checks to see if the line between two points intersects with a circle.


from line_profiler import LineProfiler
from math import sqrt
import numpy as np


class Point:
    x: float
    y: float

    def __init__(self, x: float, y: float):
        self.x = x
        self.y = y

    def __repr__(self):
        return f"Point(x={self.x}, y={self.y})"


class Circle:
    ctr: Point
    r: float

    def __init__(self, ctr: Point, r: float):
        self.ctr = ctr
        self.r = r

    def __repr__(self):
        return f"Circle(r={self.r}, ctr={self.ctr})"


def loop(p1: Point, p2: Point, circles: list[Circle]):
    m = (p1.y - p2.y) / (p1.x - p2.x)
    n = p1.y - m * p1.x

    max_x = max(p1.x, p2.x)
    min_x = min(p1.x, p2.x)

    for circle in circles:
        if sqrt((circle.ctr.x - p1.x) ** 2 + (circle.ctr.y - p1.y) ** 2) < circle.r \
                or sqrt((circle.ctr.x - p2.x) ** 2 + (circle.ctr.y - p2.y) ** 2) < circle.r:
            return False

        a = m ** 2 + 1
        b = 2 * (m * n - m * circle.ctr.y - circle.ctr.x)
        c = circle.ctr.x ** 2 + circle.ctr.y ** 2 + n ** 2 - circle.r ** 2 - 2 * n * circle.ctr.y

        # compute the intersection points
        discriminant = b ** 2 - 4 * a * c
        if discriminant <= 0:
            # no real roots, the line does not intersect the circle
            continue

        # two real roots, the line intersects the circle at two points
        x1 = (-b + sqrt(discriminant)) / (2 * a)
        x2 = (-b - sqrt(discriminant)) / (2 * a)

        # check if both points in range
        first = min_x <= x1 <= max_x
        second = min_x <= x2 <= max_x
        if first and second:
            return False

    return True


def vectorized(p1: Point, p2: Point, circles):
    m = (p1.y - p2.y) / (p1.x - p2.x)
    n = p1.y - m * p1.x

    max_x = max(p1.x, p2.x)
    min_x = min(p1.x, p2.x)

    circle_ctr_x = circles['x']
    circle_ctr_y = circles['y']
    circle_radius = circles['r']

    # Pt 1 inside circle
    if np.any(np.sqrt((circle_ctr_x - p1.x) ** 2 + (circle_ctr_y - p1.y) ** 2) < circle_radius):
        return False
    # Pt 2 inside circle
    if np.any(np.sqrt((circle_ctr_x - p2.x) ** 2 + (circle_ctr_y - p2.y) ** 2) < circle_radius):
        return False
    # Line intersects with circle in range
    a = m ** 2 + 1
    b = 2 * (m * n - m * circle_ctr_y - circle_ctr_x)
    c = circle_ctr_x ** 2 + circle_ctr_y ** 2 + n ** 2 - circle_radius ** 2 - 2 * n * circle_ctr_y

    # compute the intersection points
    discriminant = b**2 - 4*a*c
    discriminant_bigger_than_zero = discriminant > 0
    discriminant = discriminant[discriminant_bigger_than_zero]

    if discriminant.size == 0:
        return True

    b = b[discriminant_bigger_than_zero]

    # two real roots, the line intersects the circle at two points
    x1 = (-b + np.sqrt(discriminant)) / (2 * a)
    x2 = (-b - np.sqrt(discriminant)) / (2 * a)

    # check if both points in range
    in_range = (min_x <= x1) & (x1 <= max_x) & (min_x <= x2) & (x2 <= max_x)
    return not np.any(in_range)


a = Point(x=-2.47496075130008, y=1.3609840363748935)
b = Point(x=3.4637947060471084, y=-3.7779123453298817)
c = [Circle(r=1.2587063082677084, ctr=Point(x=3.618533781361757, y=2.179925931180058)), Circle(r=0.7625751871124099, ctr=Point(x=-0.3173290200183132, y=4.256206636932641)), Circle(r=0.4926043225930364, ctr=Point(x=-4.626312261120341, y=-1.5754603504419196)), Circle(r=0.6026364956540792, ctr=Point(x=3.775240278691819, y=1.7381168262343072)), Circle(r=1.2804597877349562, ctr=Point(x=4.403273380178893, y=-1.6890127555343681)), Circle(r=1.1562415624767421, ctr=Point(x=-1.0675000352105801, y=-0.23952113329203994)), Circle(r=1.112718432321835, ctr=Point(x=2.500137075066017, y=-2.77748519509295)), Circle(r=0.979889574640609, ctr=Point(x=4.494971251199753, y=-1.0530995423779388)), Circle(r=0.7817624050358268, ctr=Point(x=3.2419454348696544, y=4.3303373486692465)), Circle(r=1.0271176198616367, ctr=Point(x=-0.9740272820753071, y=-4.282195116754338)), Circle(r=1.1585218836700681, ctr=Point(x=-0.42096876790888915, y=2.135161027254492)), Circle(r=1.0242603387003988, ctr=Point(x=2.2617850544260767, y=-4.59942951839469)), Circle(r=1.5704233297828027, ctr=Point(x=-1.1182365440831088, y=4.2411408333943506)), Circle(r=0.37137272043983655, ctr=Point(x=3.280499587987774, y=-4.87871834733383)), Circle(r=1.1829610109115543, ctr=Point(x=-0.27755604766113606, y=-3.68429580935016)), Circle(r=1.0993567600839198, ctr=Point(x=0.23602306761027925, y=0.47530122196024704)), Circle(r=1.3865045367147553, ctr=Point(x=-2.537565761732492, y=4.719766182202855)), Circle(r=0.9492796511909753, ctr=Point(x=-3.7047245796551973, y=-2.501817905967274)), Circle(r=0.9866916911482386, ctr=Point(x=1.3021813533479742, y=4.754952371169189)), Circle(r=0.9053004331885084, ctr=Point(x=-3.4912157984801784, y=-0.5269727600532836)), Circle(r=1.3058987272565075, ctr=Point(x=-1.6983878085276427, y=-2.2910189455221053)), Circle(r=0.5342716756987732, ctr=Point(x=4.948676886704507, y=-1.2467089784975183)), Circle(r=1.0603926633240575, ctr=Point(x=-4.390462974765324, y=0.785568745976325)), Circle(r=0.3448422804513971, ctr=Point(x=-1.6459756952994697, y=2.7608629057950362)), Circle(r=0.8521457455807724, ctr=Point(x=-4.503217369041699, y=3.93796926957188)), Circle(r=0.602438849989669, ctr=Point(x=-2.0703406576157493, y=0.6142570312870999)), Circle(r=0.6453692950682722, ctr=Point(x=-0.14802220452893144, y=4.08189682338989)), Circle(r=0.6983361689325062, ctr=Point(x=0.09362196694661651, y=-1.0953438275586391)), Circle(r=1.880331563921456, ctr=Point(x=0.23481661751521776, y=-4.09217120864087)), Circle(r=0.5766225363413416, ctr=Point(x=3.149434524126505, y=-4.639582956406762)), Circle(r=0.6177559628867022, ctr=Point(x=-1.6758918144661683, y=-0.7954935787503492)), Circle(r=0.7347952666955615, ctr=Point(x=-3.1907522890427575, y=0.7048509241855683)), Circle(r=1.2795003337464894, ctr=Point(x=-1.777244415863577, y=2.936422879898364)), Circle(r=0.9181024765780231, ctr=Point(x=4.212544425778317, y=-1.953546993038261)), Circle(r=1.7681384709020282, ctr=Point(x=-1.3702722387909405, y=-1.7013020424154368)), Circle(r=0.5420789771729688, ctr=Point(x=4.063803796292818, y=-3.7159871611415065)), Circle(r=1.3863651881788939, ctr=Point(x=0.7685002210812408, y=-3.994230705171357)), Circle(r=0.5739750223225826, ctr=Point(x=0.08779554290638258, y=4.879912451441914)), Circle(r=1.2019825386919343, ctr=Point(x=-4.206623233886995, y=-1.1617382464768689))]

circle_dt = np.dtype('float,float,float')
circle_dt.names = ['x', 'y', 'r']
np_c = np.array([(x.ctr.x, x.ctr.y, x.r) for x in c], dtype=circle_dt)


lp1 = LineProfiler()
loop_wrapper = lp1(loop)
loop_wrapper(a, b, c)
lp1.print_stats()

lp2 = LineProfiler()
vectorized_wrapper = lp2(vectorized)
vectorized_wrapper(a, b, np_c)
lp2.print_stats()

One implementation is regular for loop implementation, and the other is vectorized implementation with numpy. From my small knowledge of vectorization, I would have guessed that the vectorized function would yield better result, but as you can see below that is not the case:

Total time: 4.36e-05 s
Function: loop at line 31

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    31                                           def loop(p1: Point, p2: Point, circles: list[Circle]):
    32         1          9.0      9.0      2.1      m = (p1.y - p2.y) / (p1.x - p2.x)
    33         1          5.0      5.0      1.1      n = p1.y - m * p1.x
    34                                           
    35         1         19.0     19.0      4.4      max_x = max(p1.x, p2.x)
    36         1          5.0      5.0      1.1      min_x = min(p1.x, p2.x)
    37                                           
    38         6         30.0      5.0      6.9      for circle in circles:
    39         6         73.0     12.2     16.7          if sqrt((circle.ctr.x - p1.x) ** 2 + (circle.ctr.y - p1.y) ** 2) < circle.r \
    40         6         62.0     10.3     14.2                  or sqrt((circle.ctr.x - p2.x) ** 2 + (circle.ctr.y - p2.y) ** 2) < circle.r:
    41                                                       return False
    42                                           
    43         6         29.0      4.8      6.7          a = m ** 2 + 1
    44         6         32.0      5.3      7.3          b = 2 * (m * n - m * circle.ctr.y - circle.ctr.x)
    45         6         82.0     13.7     18.8          c = circle.ctr.x ** 2 + circle.ctr.y ** 2 + n ** 2 - circle.r ** 2 - 2 * n * circle.ctr.y
    46                                           
    47                                                   # compute the intersection points
    48         6         33.0      5.5      7.6          discriminant = b ** 2 - 4 * a * c
    49         5         11.0      2.2      2.5          if discriminant <= 0:
    50                                                       # no real roots, the line does not intersect the circle
    51         5         22.0      4.4      5.0              continue
    52                                           
    53                                                   # two real roots, the line intersects the circle at two points
    54         1          7.0      7.0      1.6          x1 = (-b + sqrt(discriminant)) / (2 * a)
    55         1          4.0      4.0      0.9          x2 = (-b - sqrt(discriminant)) / (2 * a)
    56                                           
    57                                                   # check if one point in range
    58         1          5.0      5.0      1.1          first = min_x < x1 < max_x
    59         1          3.0      3.0      0.7          second = min_x < x2 < max_x
    60         1          2.0      2.0      0.5          if first and second:
    61         1          3.0      3.0      0.7              return False
    62                                           
    63                                                   return True

Total time: 0.0001534 s
Function: vectorized at line 66

Line #      Hits         Time  Per Hit   % Time  Line Contents
==============================================================
    66                                           def vectorized(p1: Point, p2: Point, circles):
    67         1         10.0     10.0      0.7      m = (p1.y - p2.y) / (p1.x - p2.x)
    68         1          5.0      5.0      0.3      n = p1.y - m * p1.x
    69                                           
    70         1          7.0      7.0      0.5      max_x = max(p1.x, p2.x)
    71         1          4.0      4.0      0.3      min_x = min(p1.x, p2.x)
    72                                           
    73         1         10.0     10.0      0.7      circle_ctr_x = circles['x']
    74         1          3.0      3.0      0.2      circle_ctr_y = circles['y']
    75         1          3.0      3.0      0.2      circle_radius = circles['r']
    76                                           
    77                                               # Pt 1 inside circle
    78         1        652.0    652.0     42.5      if np.any(np.sqrt((circle_ctr_x - p1.x) ** 2 + (circle_ctr_y - p1.y) ** 2) < circle_radius):
    79                                                   return False
    80                                               # Pt 2 inside circle
    81         1        161.0    161.0     10.5      if np.any(np.sqrt((circle_ctr_x - p2.x) ** 2 + (circle_ctr_y - p2.y) ** 2) < circle_radius):
    82                                                   return False
    83                                               # Line intersects with circle in range
    84         1         13.0     13.0      0.8      a = m ** 2 + 1
    85         1        120.0    120.0      7.8      b = 2 * (m * n - m * circle_ctr_y - circle_ctr_x)
    86         1         77.0     77.0      5.0      c = circle_ctr_x ** 2 + circle_ctr_y ** 2 + n ** 2 - circle_radius ** 2 - 2 * n * circle_ctr_y
    87                                           
    88                                               # compute the intersection points
    89         1         25.0     25.0      1.6      discriminant = b**2 - 4*a*c
    90         1         46.0     46.0      3.0      discriminant_bigger_than_zero = discriminant > 0
    91         1         56.0     56.0      3.7      discriminant = discriminant[discriminant_bigger_than_zero]
    92                                           
    93         1          6.0      6.0      0.4      if discriminant.size == 0:
    94                                                   return True
    95                                           
    96         1         12.0     12.0      0.8      b = b[discriminant_bigger_than_zero]
    97                                           
    98                                               # two real roots, the line intersects the circle at two points
    99         1         77.0     77.0      5.0      x1 = (-b + np.sqrt(discriminant)) / (2 * a)
   100         1         28.0     28.0      1.8      x2 = (-b - np.sqrt(discriminant)) / (2 * a)
   101                                           
   102                                               # check if both points in range
   103         1         96.0     96.0      6.3      in_range = (min_x <= x1) & (x1 <= max_x) & (min_x <= x2) & (x2 <= max_x)
   104         1        123.0    123.0      8.0      return not np.any(in_range)

For some reason the non vectorized function runs faster.

My simple guess is that it is because the vectorized function runs over the whole array every time and the non vectorized one stops in the middle when it find a circle intersections.

So my questions are:

  1. Is there a numpy function which doesn't iterate over the whole array but stops when the results are false?
  2. What is the reason the vectorized function takes longer to run?
  3. Any general optimization suggestions would be appreciated
Idan
  • 135
  • 1
  • 7
  • 1
    In this particular example the iteration stops on circle number 6 out of 39. For the first five, it skips part of the loop because `discriminant <= 0` is `True`. The sixth one ends the iteration because `first and second` is `True`. Note that to determine this, all the calculations for the circle have been done. Similarly the vectorized variant has to go through all the steps in this case, but processing all the circles in each step. Although you make some attempts to shortcut, none of them actually help here, since at least one circle always remains, and you never reduce the working set. – Dan Mašek Dec 11 '22 at 00:25
  • 1
    You're probably also paying a lot of overhead with allocations of intermediates, and some parts of the algorithm could be optimized. For good testing the sample size of 39 is way to small -- make it 100000 (just repeat the same few things over and over). Make the test more fair, by making a scenario where only the last one succeeds. Or the middle one. – Dan Mašek Dec 11 '22 at 00:28
  • You also have a bug in there, if I remove 6th and 7th circle from the input, `loop` returns `True`, yet `vectorized` returns `False`. Those two functions don't do the same thing. | I just noticed you do reduce the working set after the discriminant test, but it's too little too late. – Dan Mašek Dec 11 '22 at 00:42
  • For one, your "# check if one point in range" isn't the same -- in `loop` you use `<`, but in `vectorized` you use `<=`. Ouch, this is becoming less and less interesting. You really need to make sure you're comparing apples with apples, for benchmarking to have any meaning. – Dan Mašek Dec 11 '22 at 01:12
  • Ok, the bug is in the indentation of the `return True` in `loop`. Move it outside the loop. Might be a typo. – Dan Mašek Dec 11 '22 at 01:18
  • One more thing, while line profiler is a useful tool to get insight into what takes up most of the time, it's not so good for comparing the overall runtime of two functions. It imposes noticeable overhead the more python statements you execute, so once no shortcuts can be taken, and the input size gets big enough, I easily see 6x worse times for the loop implementation when comparing with and without line profiler. – Dan Mašek Dec 11 '22 at 01:41
  • @DanMašek hey, sorry, they were both typos. I extracted those functions from some bigger snippets of code. Is there any way to do the same with numpy? (Stopping mid loop instead of going through the whole array) – Idan Dec 11 '22 at 10:33
  • Also, this should run on a small amount of circles (in the tens) hence why the circle amount is around that size. I'm sure it would be different as the sample size grows – Idan Dec 11 '22 at 10:36

1 Answers1

3

Is there a numpy function which doesn't iterate over the whole array but stops when the results are false?

No. This is a long standing feature requested by Numpy users but it will certainly never be added to Numpy. For simple cases, like returning the first index of a boolean array, Numpy could implement that, but the thing is the boolean array needs to be fully created in the first place. In order to support the general case, Numpy should merge multiple operations and do some kind of lazy computation. This basically means rewriting completely Numpy from scratch for an efficient implementation (which is a huge work).

If you need to do that, there are two main solution:

  • operating on chunks so for the computation to stop early (while computing up to len(chunk) additional items);
  • writing your own fast compiled implementation using Numba or Cython (with views).

What is the reason the vectorized function takes longer to run?

The input is pretty small and Numpy is not optimized for small arrays. Indeed, each call to a Numpy function typically takes 0.4-4 us on a mainstream processor (like my i5-9600KF). This is because Numpy as many checks to do, new arrays to allocates, generic internal iterators to build, etc. As a result, a line like np.any(np.sqrt((circle_ctr_x - p1.x) ** 2 + (circle_ctr_y - p1.y) ** 2) < circle_radius) doing 8 Numpy calls and creating 7 temporary arrays takes about 8 us on my machine. The second similar line takes the same time. Together, they are already slower than the non-vectorized version.

As pointed out in the question and the comments, the non-vectorized function can stop early and this can also help the non-vectorized version to be even faster than the other.

Any general optimization suggestions would be appreciated

Regarding your code, using Numba (with plain loops and Numpy arrays) is certainly a good idea for performance. Note the first call can be slower due to the compilation time (you can provide the signature to do this at loading time or just use an AOT compiler including Cython).

Note that array of structure are generally not efficient since they prevent the efficient use of SIMD instructions. They are also certainly not efficiently computed by Numpy since the datatype is dynamically created and the Numpy code is already compiled ahead of time (so it cannot implement function for this specific datatype and has to use generic dynamic operation on each item of the array which is significantly slower than basic datatypes). Please consider using structure of arrays. For more information please read this post and more generally this post.

Jérôme Richard
  • 41,678
  • 6
  • 29
  • 59