I want to progressively filter values from one list into sublists. Each time a condition matches, I want ignore that value for the next filter.
For example, let's say I want to a) grab items divisible by 3, b) grab odd items and c) keep the rest.
li = [0,1,2,3,4,5,6,7,8,9]
I want to get:
divby3 = [3,6,9]
odd = [1,5,7]
rest =[0,2,4,8]
Is there something in itertools to do this with? I wrote some test code, but it looks like something probably already exists. The most expressive, and amongst the quickest was:
def append_split(li,cond):
""" list comp appends to 2 separate lists """
hits, miss = [],[]
[hits.append(v) if cond(v) else miss.append(v) for v in li]
return hits, miss
p1_divby3, li = append_split(li, is_3)
p2_odd, p3_rest = append_split(li, is_odd)
or its suggested alternative:
def looped_append(li, cond):
""" for-loop to avoid side-effects within list comp """
hits, miss = [],[]
for v in li:
(hits if cond(v) else miss).append(v)
return hits, miss
Is there a better way in the standard library?
The performance I got (on a 10000 item list) was as follows:
timings:
0.00301504 by_filter_prune_set
0.00335598 by_append_split
0.00498891 by_tupling
0.56877589 by_memberships
Full test code:
import sys
from time import time
if len(sys.argv) >=2 :
li = range(0,int(sys.argv[1]))
do_compare = False
else:
li = [0,1,2,3,4,5,6,7,8,9]
do_compare = True
exp = dict(
p1_divby3 = [3,6,9],
p2_odd = [1,5,7],
p3_rest =[0,2,4,8],
)
def is_3(v):
return v and not (v % 3)
def is_odd(v):
return bool(v % 2)
def get_result(di):
return {k:v for k,v in sorted(di.items()) if k in exp}
def by_memberships(li):
""" SLOWEST. filter checks that item wasn't previously extracted """
p1_divby3 = [v for v in li if is_3(v)]
p2_odd = [v for v in li if is_odd(v) and not v in p1_divby3]
p3_rest = [v for v in li if not v in p1_divby3 and not v in p2_odd]
return get_result(locals())
def prune_set(candidates, seen):
""" filter, then prune found from list."""
seen = set(seen) #really slow if you dont cast to a set
return [v for v in candidates if not v in seen]
def by_filter_prune_set(li):
p1_divby3 = [v for v in li if is_3(v)]
li = prune_set(li, p1_divby3)
p2_odd = [v for v in li if is_odd(v)]
p3_rest = prune_set(li, p2_odd)
return get_result(locals())
def looped_append(li, cond):
# from comments, also slighty faster than append_split
hits, miss = [],[]
for v in li:
(hits if cond(v) else miss).append(v)
return hits, miss
def by_looped_append(li):
p1_divby3, li = looped_append(li, is_3)
p2_odd, p3_rest = looped_append(li, is_odd)
return get_result(locals())
def append_split(li,cond):
""" list comp appends to 2 separate lists """
hits, miss = [],[]
[hits.append(v) if cond(v) else miss.append(v) for v in li]
return hits, miss
def by_append_split(li):
p1_divby3, li = append_split(li, is_3)
p2_odd, p3_rest = append_split(li, is_odd)
return get_result(locals())
def split_tupling(li, cond):
""" put into a (hit, miss) tuple then re-filter into 2 lists"""
undefined = NotImplemented
li = [(v, undefined) if cond(v) else (undefined, v) for v in li ]
hits = [v[0] for v in li if v[0] is not undefined]
miss = [v[1] for v in li if v[0] is undefined]
return hits, miss
def by_tupling(li):
p1_divby3, li = split_tupling(li, is_3)
p2_odd, p3_rest = split_tupling(li, is_odd)
return get_result(locals())
timings = {}
for fn in [by_memberships, by_looped_append, by_append_split, by_tupling, by_filter_prune_set]:
sys.stdout.write(f"\n\n{fn.__name__:20.20}")
start = time()
got = fn(li)
duration = time()-start
sys.stdout.write(f" {duration:10.8f}\n")
timings[fn.__name__] = duration
if do_compare:
if got == exp:
flag = "✅"
else:
flag = "❌"
print(f"{flag}{exp=}\n{flag}{got=}")
li = sorted([(v,k) for k,v in timings.items()])
print("\n\ntimings:")
[print(f"{tu[0]:010.8f} {tu[1]}") for tu in li]