The main problem with your solution was, that in your solution you inserted elements 2 * 383656 times into an existing list.
Every time all the elements after the insertion point had to be shifted.
Thus it's faster to create a new list.
If for any reason you want that cleanData
stays the same old object with the new data (perhaps, because another function / object has a reference to it and should see the changed data) then write
cleanData[:] = blablabla
instead of
cleanData = blablabla
I wrote following two solutions (second faster one only after answer got accepted)
import functools
import operator
cleanData = functools.reduce(
operator.iconcat,
(list(v) for v in zip(*([iter(cleanData)] * 3), listOfX, listOfY)),
[])
and
import itertools
cleanData = list(itertools.chain.from_iterable(
(v for v in zip(*([iter(cleanData)] * 3), listOfX, listOfY)),
))
In order to understand the zip(*([iter(cleanData)] * 3), listOfX, listOfY)
construct you might look at what is meaning of [iter(list)]*2 in python?
Potential downside of my first solution (depending on the context). Using functools.reduce
and operator.iconcat
creates a list and no generator.
The second solution returns a list. If you want a generator, then just remove list(
and one trailing )
and it will be a generator
Second solution is (about 2x) faster than the first one.
Then I wrote some code to compare performance and results of the two given solutions and mine:
Not a very big difference (2.5x), but the second solution seems to be a bit faster than @Błotosmętek's first solution and Alain T.'s solution.
from contextlib import contextmanager
import functools
import itertools
import operator
import time
@contextmanager
def measuretime(comment):
print("=" * 76)
t0 = time.time()
yield comment
print("%s: %5.3fs" % (comment, time.time() - t0))
print("-" * 76 + "\n")
N = 383656
t0 = time.time()
with measuretime("create listOfX"):
listOfX = list(range(N))
with measuretime("create listOfY"):
listOfY = list(range(1000000, 1000000 + N))
print("listOfX", len(listOfX), listOfX[:10])
print("listOfY", len(listOfY), listOfY[:10])
with measuretime("create cleanData"):
origCleanData = functools.reduce(
operator.iconcat,
(["2020-010-1T01:00:00.%06d" % i, "c%d" % i, "%d" %i] for i in range(N)),
[])
print("cleanData", len(origCleanData), origCleanData[:12])
cleanData = list(origCleanData)
with measuretime("funct.reduce operator icat + zip"):
newcd1 = functools.reduce(
operator.iconcat,
(list(v) for v in zip(*([iter(cleanData)] * 3), listOfX, listOfY)),
[])
print("NEW", len(newcd1), newcd1[:3*10])
cleanData = list(origCleanData)
with measuretime("itertools.chain + zip"):
cleanData = list(itertools.chain.from_iterable(
(v for v in zip(*([iter(cleanData)] * 3), listOfX, listOfY)),
))
print("NEW", len(cleanData), cleanData[:3*10])
assert newcd1 == cleanData
cleanData = list(origCleanData)
with measuretime("blotosmetek"):
tmp = []
n = min(len(listOfX), len(listOfY), len(cleanData)//3)
for i in range(n):
tmp.extend(cleanData[3*i : 3*i+3])
tmp.append(listOfX[i])
tmp.append(listOfY[i])
cleanData = tmp
print("NEW", len(cleanData), cleanData[:3*10])
assert newcd1 == cleanData
cleanData = list(origCleanData)
with measuretime("alainT"):
cleanData = [ v for i,x,y in zip(range(0,len(cleanData),3),listOfX,listOfY)
for v in (*cleanData[i:i+3],x,y) ]
print("NEW", len(cleanData), cleanData[:3*10])
assert newcd1 == cleanData
Output on my old PC looks like:
============================================================================
create listOfX: 0.013s
----------------------------------------------------------------------------
============================================================================
create listOfY: 0.013s
----------------------------------------------------------------------------
listOfX 383656 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
listOfY 383656 [1000000, 1000001, 1000002, 1000003, 1000004, 1000005, 1000006, 1000007, 1000008, 1000009]
============================================================================
create cleanData: 0.454s
----------------------------------------------------------------------------
cleanData 1150968 ['2020-010-1T01:00:00.000000', 'c0', '0', '2020-010-1T01:00:00.000001', 'c1', '1', '2020-010-1T01:00:00.000002', 'c2', '2', '2020-010-1T01:00:00.000003', 'c3', '3']
============================================================================
funct.reduce operator icat + zip: 0.240s
----------------------------------------------------------------------------
NEW 1918280 ['2020-010-1T01:00:00.000000', 'c0', '0', 0, 1000000, '2020-010-1T01:00:00.000001', 'c1', '1', 1, 1000001, '2020-010-1T01:00:00.000002', 'c2', '2', 2, 1000002, '2020-010-1T01:00:00.000003', 'c3', '3', 3, 1000003, '2020-010-1T01:00:00.000004', 'c4', '4', 4, 1000004, '2020-010-1T01:00:00.000005', 'c5', '5', 5, 1000005]
============================================================================
itertools.chain + zip: 0.109s
----------------------------------------------------------------------------
NEW 1918280 ['2020-010-1T01:00:00.000000', 'c0', '0', 0, 1000000, '2020-010-1T01:00:00.000001', 'c1', '1', 1, 1000001, '2020-010-1T01:00:00.000002', 'c2', '2', 2, 1000002, '2020-010-1T01:00:00.000003', 'c3', '3', 3, 1000003, '2020-010-1T01:00:00.000004', 'c4', '4', 4, 1000004, '2020-010-1T01:00:00.000005', 'c5', '5', 5, 1000005]
============================================================================
blotosmetek: 0.370s
----------------------------------------------------------------------------
NEW 1918280 ['2020-010-1T01:00:00.000000', 'c0', '0', 0, 1000000, '2020-010-1T01:00:00.000001', 'c1', '1', 1, 1000001, '2020-010-1T01:00:00.000002', 'c2', '2', 2, 1000002, '2020-010-1T01:00:00.000003', 'c3', '3', 3, 1000003, '2020-010-1T01:00:00.000004', 'c4', '4', 4, 1000004, '2020-010-1T01:00:00.000005', 'c5', '5', 5, 1000005]
============================================================================
alainT: 0.258s
----------------------------------------------------------------------------
NEW 1918280 ['2020-010-1T01:00:00.000000', 'c0', '0', 0, 1000000, '2020-010-1T01:00:00.000001', 'c1', '1', 1, 1000001, '2020-010-1T01:00:00.000002', 'c2', '2', 2, 1000002, '2020-010-1T01:00:00.000003', 'c3', '3', 3, 1000003, '2020-010-1T01:00:00.000004', 'c4', '4', 4, 1000004, '2020-010-1T01:00:00.000005', 'c5', '5', 5, 1000005]