(EDITED): Here are a couple of ways of achieving this:
def remove_non_standard_buffer(items, template):
buffer = []
len_template = len(template)
j = 0
for item in items:
if item == template[j] and j < len_template:
buffer.append(item)
j += 1
elif item == template[0]:
buffer = [item]
j = 1
else:
buffer = []
j = 0
if len(buffer) == len_template:
for buffer_item in buffer:
yield buffer_item
buffer = []
j = 0
def remove_non_standard_slicing(items, template):
start = 0
end = len(template)
for item in items:
test_seq = items[start:end]
if test_seq == template:
yield from template
end += 1
start += 1
def remove_non_standard_for(items, template):
len_template = len(template)
for i, item in enumerate(items):
if items[i:i + len_template] == template:
yield from template
def remove_non_standard_while(items, template):
len_template = len(template)
len_items = len(items)
i = 0
while i < len_items - len_template + 1:
if items[i:i + len_template] == template:
yield from template
i += len_template
else:
i += 1
def remove_non_standard_while_reverse(items, template):
i = 0
len_template = len(template)
len_items = len(items)
while i < len_items - len_template + 1:
to_yield = True
for j in range(len_template - 1, -1, -1):
if items[i + j] != template[j]:
to_yield = False
break
if to_yield:
yield from template
i += len_template
else:
i += j + 1
def remove_non_standard_count(items, template):
n = 0
i = 0
len_template = len(template)
len_items = len(items)
while i < len_items - len_template + 1:
if items[i:i + len_template] == template:
n += 1
i += len_template
else:
i += 1
return template * n
def remove_non_standard_count_reverse(items, template):
n = 0
i = 0
len_template = len(template)
len_items = len(items)
while i < len_items - len_template + 1:
to_yield = True
for j in range(len_template - 1, -1, -1):
if items[i + j] != template[j]:
to_yield = False
break
if to_yield:
n += 1
i += len_template
else:
i += j + 1
return template * n
and testing it:
ll = [0, 1, 2, 3, 4, 0, 1, 2, 4, 0, 0, 1, 2, 3, 4]
print(list(remove_non_standard_buffer(ll, [0, 1, 2, 3, 4])))
# [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
print(list(remove_non_standard_reverse(ll, [0, 1, 2, 3, 4])))
# [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
print(list(remove_non_standard_slicing(ll, [0, 1, 2, 3, 4])))
# [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
print(list(remove_non_standard_for(ll, [0, 1, 2, 3, 4])))
# [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
print(list(remove_non_standard_while(ll, [0, 1, 2, 3, 4])))
# [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
print(list(remove_non_standard_while_reverse(ll, [0, 1, 2, 3, 4])))
# [0, 1, 2, 3, 4, 0, 1, 2, 3, 4]
with the respective timings:
%timeit list(remove_non_standard_buffer(ll * 1000, [0, 1, 2, 3, 4]))
# 100 loops, best of 3: 3.35 ms per loop
%timeit list(remove_non_standard_slicing(ll * 1000, [0, 1, 2, 3, 4]))
# 100 loops, best of 3: 3.35 ms per loop
%timeit list(remove_non_standard_for(ll * 1000, [0, 1, 2, 3, 4]))
# 100 loops, best of 3: 3.19 ms per loop
%timeit list(remove_non_standard_while(ll * 1000, [0, 1, 2, 3, 4]))
# 100 loops, best of 3: 2.29 ms per loop
%timeit list(remove_non_standard_while_reverse(ll * 1000, [0, 1, 2, 3, 4]))
# 100 loops, best of 3: 2.52 ms per loop
%timeit remove_non_standard_count(ll * 1000, [0, 1, 2, 3, 4])
# 100 loops, best of 3: 1.85 ms per loop
%timeit remove_non_standard_count_reverse(ll * 1000, [0, 1, 2, 3, 4])
# 100 loops, best of 3: 2.13 ms per loop
remove_non_standard_slicing()
uses substantially the same approach as @EliasKnudsen answer, but the approach with the remove_non_standard_while()
is considerably faster.
remove_non_standard_while_reverse()
is even more efficient, but pays the relatively inefficient looping in Python.
Instead, the _count
solutions are a somewhat over-optimized for list
version of while
that take advantage of the faster list
multiplication operations (and therefore it is probably less useful for pandas
dataframes).