Once I faced the same problem. The only difference was that I had to efficiently keep line segments in a list. It was for a Monte-Carlo simulation. And the newly randomly generated line segments had to be added to the existing sorted and merged line segments.
I adapted the algorithm to your problem using the answer by lunixbochs to convert IPs to integers.
This solution allows to add a new IP range to the existing list of already merged ranges (while other solutions rely on having the list-of-ranges-to-merge sorted and do not allow adding a new range to already merged range list). It's done in add_range
function by using bisect
module to find the place where to insert the new IP range and then deleting the redundant IP intervals and inserting the new range with adjusted boundaries so that the new range embraces all the deleted ranges.
import socket
import struct
import bisect
def ip2long(ip):
'''IP to integer'''
packed = socket.inet_aton(ip)
return struct.unpack("!L", packed)[0]
def long2ip(n):
'''integer to IP'''
unpacked = struct.pack('!L', n)
return socket.inet_ntoa(unpacked)
def get_ips(s):
'''Convert string IP interval to tuple with integer representations of boundary IPs
'1.1.1.1-7' -> (a,b)'''
s1,s2 = s.split('-')
if s2.isdigit():
s2 = s1[:-1] + s2
return (ip2long(s1),ip2long(s2))
def add_range(iv,R):
'''add new Range to already merged ranges inplace'''
left,right = get_ips(R)
#left,right are left and right boundaries of the Range respectively
#If this is the very first Range just add it to the list
if not iv:
iv.append((left,right))
return
#Searching the first interval with left_boundary < left range side
p = bisect.bisect_right(iv, (left,right)) #place after the needed interval
p -= 1 #calculating the number of interval basing on the position where the insertion is needed
#Interval: |----X----| (delete)
#Range: <--<--|----------| (extend)
#Detect if the left Range side is inside the found interval
if p >=0: #if p==-1 then there was no interval found
if iv[p][1]>= right:
#Detect if the Range is completely inside the interval
return #drop the Range; I think it will be a very common case
if iv[p][1] >= left-1:
left = iv[p][0] #extending the left Range interval
del iv[p] #deleting the interval from the interval list
p -= 1 #correcting index to keep the invariant
#Intervals: |----X----| |---X---| (delete)
#Range: |-----------------------------|
#Deleting all the intervals which are inside the Range interval
while True:
p += 1
if p >= len(iv) or iv[p][0] >= right or iv[p][1] > right:
'Stopping searching for the intervals which is inside the Range interval'
#there are no more intervals or
#the interval is to the right of the right Range side
# it's the next case (right Range side is inside the interval)
break
del iv[p] #delete the now redundant interval from the interval list
p -= 1 #correcting index to keep the invariant
#Interval: |--------X--------| (delete)
#Range: |-----------|-->--> (extend)
#Working the case when the right Range side is inside the interval
if p < len(iv) and iv[p][0] <= right-1:
#there is no condition for right interval side since
#this case would have already been worked in the previous block
right = iv[p][1] #extending the right Range side
del iv[p] #delete the now redundant interval from the interval list
#No p -= 1, so that p is no pointing to the beginning of the next interval
#which is the position of insertion
#Inserting the new interval to the list
iv.insert(p, (left,right))
def merge_ranges(ranges):
'''Merge the ranges'''
iv = []
for R in ranges:
add_range(iv,R)
return ['-'.join((long2ip(left),long2ip(right))) for left,right in iv]
ranges = ('1.1.1.1-7', '2.2.2.2-10', '3.3.3.3-3.3.3.3', '1.1.1.4-25', '2.2.2.4-6')
print(merge_ranges(ranges))
Output:
['1.1.1.1-1.1.1.25', '2.2.2.2-2.2.2.10', '3.3.3.3-3.3.3.3']
This was a lot of fun for me to code! Thank you for that :)