I have a sparse array of length 64, encoded with a 64-bit integer binaryPattern
.
For example:
binaryPattern = 0b0000000000000000000000000000000000000000000000000000000000001010
represents this array:
[0, value1, 0, value2, 0, 0, 0,,,,]
I need to recover the indexes of valueX
, from binaryPattern
as fast as possible.
I wrote various function attempts named getIndexesXXX()
, but those functions, apart from being slow themselves, have performances that change depending on conditions like the percentage of zeros, or if the ones are at the start or at the end of the binaryPattern
I'm using Python 3.10, so I have access to the functions like (int).bit_length().
Note that performance changes between debugging and running without debugging. What matters is when non debugging.
This is the code I have:
binaryPattern
written in binary, is 1 when the array is non-zero, and is 0 when the array is zero.
getIndexes()
is the naivest attempt: it checks the bits one by one. It is faster when a few bits are clumped together at the start.
getIndexes2()
and getIndexes2_5()
check which is the first nonzero bit, and avoid checking zeros unnecessarily.
getIndexes3()
uses numpy to calculate all the bit shifts. Is brute force, but is faster on some cases, like when there is a high percentage of ones.
getIndexes4()
is similar to getIndexes3()
, but uses list comprehension to find the non zero bits.
from numpy.random import default_rng
import time
import numpy as np
class sparseArray():
_indexes = np.arange(64, dtype=np.uint64)
_weights = 2**_indexes
_dtypesInt = [np.uint8, np.uint16, np.uint32, np.uint64]
def __init__(self, values: np.array, indexes: np.array) -> None:
self.array = values
self.binaryPattern = int(np.sum(self._weights[indexes]))
# print(bin(self.binaryPattern)[2:])
def getIndexes(self):
answer = np.zeros(len(self.array), dtype=np.uint8)
binaryMask = self.binaryPattern
n = 0
index = 0
while binaryMask > 0:
if binaryMask & 1:
answer[n] = index
n += 1
binaryMask = binaryMask >> 1
index += 1
return answer
def getIndexes2(self):
answer = np.zeros(len(self.array), dtype=np.uint8)
binaryMask = int(self.binaryPattern)
n = 0
index = -1
while binaryMask > 0:
# isolates first nonzero bit
first1 = binaryMask & ~(binaryMask - 1)
index += first1.bit_length()
answer[n] = index
n += 1
binaryMask //= 2*first1
return answer
not1 = ~np.array([1], dtype=np.uint64)[0]
def getIndexes2_5(self):
answer = np.zeros(len(self.array), dtype=np.uint8)
binaryMask = int(self.binaryPattern)
indexes = self._indexes
n = 0
index = -1
while binaryMask > 0:
# isolates first zero bit
first0 = ~(binaryMask | (~binaryMask - 1))
# isolates first nonzero bit
first1 = binaryMask & ~(binaryMask - 1)
# if the sequence of 0 is larger than the sequence of 1...
if first1*4 > first0:
index += first1.bit_length()
answer[n] = index
n += 1
binaryMask //= 2*first1
else:
length1 = first0.bit_length()-1
index += 1
answer[n:n+length1] = indexes[index:index+length1]
index += length1-1
n += length1
binaryMask //= first0
return answer
def getIndexes3(self):
bitshift = ((self.binaryPattern//self._weights) & 1) == 1
return self._indexes[bitshift]
def getIndexes3_5(self):
lengthBP = self.binaryPattern.bit_length()
# creates a boolean array=True on bits=1
bitshift = ((self.binaryPattern//self._weights[:lengthBP]) & 1) == 1
return self._indexes[:lengthBP][bitshift]
def getIndexes4(self):
i = [x == "1" for x in bin(x.binaryPattern)[-1:1:-1]]
return self._indexes[:len(i)][i]
rng = default_rng()
length = 32 #how many ones will have the binaryPattern
indexes = np.sort(rng.choice(64, size=length, replace=False))
#getIndexes is faster for this type of data, which may be frequent:
#x = sparseArray(np.array([1, 4, 8, 10, 10]), indexes=np.array([0, 2, 4, 5, 7]))
x = sparseArray(np.arange(length), indexes=indexes)
print(bin(x.binaryPattern)[2:])
print(x.getIndexes())
print(x.getIndexes2())
print(x.getIndexes2_5())
print(x.getIndexes3())
print(x.getIndexes3_5())
print(x.getIndexes4())
start = time.time()
for _ in range(100000):
x.getIndexes()
print(f"time for getIndexes: {time.time()-start:.2f}")
start = time.time()
for _ in range(100000):
x.getIndexes2()
print(f"time for getIndexes2: {time.time()-start:.2f}")
start = time.time()
for _ in range(100000):
x.getIndexes2_5()
print(f"time for getIndexes2_5: {time.time()-start:.2f}")
start = time.time()
for _ in range(100000):
x.getIndexes3()
print(f"time for getIndexes3: {time.time()-start:.2f}")
start = time.time()
for _ in range(100000):
x.getIndexes3_5()
print(f"time for getIndexes3_5: {time.time()-start:.2f}")
start = time.time()
for _ in range(100000):
x.getIndexes4()
print(f"time for getIndexes4: {time.time()-start:.2f}")