Thank you for your comments and ideas I created the following solution witch fits to my use case.
class MutableIterator():
__slots__=('_chain','_iterator','_depth')
def __init__(self,*iterators):
self._chain=itertools.chain
self._depth=0
s=len(iterators)
if s==0:
self._iterator=iter(()) #empty iterator
elif s==1:
self._iterator =iter(iterators[0])
else:
self._iterator=self._chain(*iterators)
def __next__(self):
yield next(self._iterator)
def __iter__(self):
try:
while 1:
yield next(self._iterator)
except StopIteration:
pass
def append(self,iterator):
if self._depth>20000:
# maximum depth of c-level recursions possible we must consume here the iterator
self._iterator=self._chain(list(self._iterator),iter(iterator))
self._depth=0
else:
self._iterator = self._chain(self._iterator, iter(iterator))
self._depth +=1
def appendleft(self,iterator):
if self._depth>20000:
# maximum depth of c-level recursions possible we must consume here the iterator
self._iterator = self._chain(iterator, list(self._iterator))
self._depth=0
else:
self._iterator = self._chain(iterator, self._iterator)
self._depth +=1
E.g. it delivers the output:
a=[[1,2,3],[4,5,6]]
my_iterator=MutableIterator(a)
for i in my_iterator:
if type(i) is list:
my_iterator.appendleft(iter(i))
print(i,end=', ')
[1, 2, 3], 1, 2, 3, [4, 5, 6], 4, 5, 6,
Even that it works I'm not 100% satisfied.
- It would be nice to have a build-in solution for this problem
- In fact in
__iter__()
I replace the for loop by a while loop which is not so nice from my point of few.
- From time to time the iterator must be consumed internally to avoid recursion erros in c-level of python (I must remark that in my use case depth > 1000 will not come up). But with this code the depth is unlimited.
I made a test against a solution based on collections.deque
:
class MutableIteratorDeque():
__slots__=('_iterator')
def __init__(self,*iterators):
s=len(iterators)
if s==0:
self._iterator=deque(()) #empty iterator
elif s==1:
self._iterator =deque((iter(iterators[0]),))
else:
self._iterator=deque((iter(i.__iter__()) for i in iterators))
def __next__(self):
while 1:
try:
yield next(self._iterator[0])
except StopIteration:
try:
self._iterator.popleft()
except IndexError:
raise StopIteration
def __iter__(self):
while 1:
try:
yield next(self._iterator[0])
except StopIteration:
self._iterator.popleft()
except IndexError:
break
def append(self,iterator):
self._iterator.append(iter(iterator))
def appendleft(self,iterator):
self._iterator.appendleft(iter(iterator))
Here my testing functions:
item_number=10000000
deeplist=[]
sub=deeplist
for i in range(item_number):
sub.append([1])
sub=sub[-1]
flatlist=list(range(item_number))
def iter1():
global deeplist
my_iterator=MutableIterator(deeplist)
for i in my_iterator:
if type(i) is list:
my_iterator.appendleft(iter(i))
def iter1b():
global flatlist
my_iterator=MutableIterator(flatlist)
for i in my_iterator:
if type(i) is list:
my_iterator.appendleft(iter(i))
def iter2():
global deeplist
my_iterator=MutableIteratorDeque(deeplist)
for i in my_iterator:
if type(i) is list:
my_iterator.appendleft(iter(i))
def iter2b():
global flatlist
my_iterator=MutableIteratorDeque(flatlist)
for i in my_iterator:
if type(i) is list:
my_iterator.appendleft(iter(i))
print('Used size of list: %i'%item_number)
print('MutableIterator via chain() deeplist: %fs'%timeit.timeit(iter1,number=1))
print('MutableIterator via deque() deeplist: %fs'%timeit.timeit(iter2,number=1))
print('MutableIterator via chain() flatlist: %fs'%timeit.timeit(iter1b,number=1))
print('MutableIterator via deque() flatlist: %fs'%timeit.timeit(iter2b,number=1))
I got following results based on Python 3.9 (64Bit):
Used size of list: 10000000
MutableIterator via chain() deeplist: 4.338570s
MutableIterator via deque() deeplist: 4.664340s
MutableIterator via chain() flatlist: 0.802791s
MutableIterator via deque() flatlist: 0.912932s
This means yes for sure the iteration time increases if we have deeper structures. Here itertools.chain
behaves a bit faster as a solution based on collections.deque
even that the iterator must be internally consumed from time to time to avoid recursion erros.
But we can also say the difference in between the two solutions is not really large.
After the input from @Kelly Bundy we can see that there are nested structures where the creation of the chain()
-objects seams to be to "costly", so overall it might be recommend to use the deque
solution:
deeplist = []
for i in range(10000):
deeplist = [deeplist]
deeplist += [0] * 10000
print('MutableIterator via chain() deeplist: %fs'%timeit.timeit(iter1,number=1))
print('MutableIterator via deque() deeplist: %fs'%timeit.timeit(iter2,number=1))
Result:
MutableIterator via chain() deeplist: 0.907174s
MutableIterator via deque() deeplist: 0.004194s