A generator version, to be used when pred
does heavy computation and you wish to run on iterables instead of sequences.
Does not hold all values in memory, runs pred
only once for every object.
from collections import deque
from typing import Callable, TypeVar, Iterable
_T = TypeVar('_T')
def iter_split(pred: Callable[[_T], bool],
iterable: Iterable[_T]) -> tuple[Iterable[_T], Iterable[_T]]:
"""Split an iterable into two iterables based on a predicate.
The predicate will only be called once per element.
Returns:
A tuple of two iterables, the first containing all elements for which
the predicate returned True, the second containing all elements for
which the predicate returned False.
"""
iterator = iter(iterable)
true_values: deque[_T] = deque()
false_values: deque[_T] = deque()
def true_generator():
while True:
while true_values:
yield true_values.popleft()
for item in iterator:
if pred(item):
yield item
break
false_values.append(item)
else:
break
def false_generator():
while True:
while false_values:
yield false_values.popleft()
for item in iterator:
if not pred(item):
yield item
break
true_values.append(item)
else:
break
return true_generator(), false_generator()
A thread-proof example for the true_generator()
(lock is shared between both gens):
lock = RLock() # RLock in case pred uses a generator.
def true_generator():
lock.acquire()
while True:
while true_values:
lock.release()
yield true_values.popleft()
lock.acquire()
for item in iterator:
try:
res = pred(item)
except BaseException:
lock.release()
raise
if res:
lock.release()
yield item
lock.acquire()
break
false_values.append(item)
else:
break
lock.release()