Here's an answer comparing some of the possible methods with 2 differents datasets, one will consist of many little arrays, the other one will be few large arrays:
import timeit
import random
from itertools import chain
def f1(a, b):
return list(chain.from_iterable(zip(a, b)))
def f2(a, b):
return list(sum(zip(a, b), ()))
def f3(a, b):
result = []
for (e1, e2) in zip(a, b):
result += [e1, e2]
return result
def f4(a, b):
result = []
len_result = min(len(a), len(b))
result = []
i = 0
while i < len_result:
result.append(a[i])
result.append(b[i])
i += 1
return result
# Small benchmark
N = 5000000
a_small = ['a', 'b', 'c', 'd']
b_small = ['e', 'f', 'g', 'h']
benchmark1 = [
timeit.timeit(
'f1(a_small, b_small)', setup='from __main__ import f1, a_small,b_small', number=N),
timeit.timeit(
'f2(a_small, b_small)', setup='from __main__ import f2, a_small,b_small', number=N),
timeit.timeit(
'f3(a_small, b_small)', setup='from __main__ import f3, a_small,b_small', number=N),
timeit.timeit(
'f4(a_small, b_small)', setup='from __main__ import f4, a_small,b_small', number=N)
]
for index, value in enumerate(benchmark1):
print " - Small sample with {0} elements -> f{1}={2}".format(len(a_small), index + 1, value)
# Large benchmark
N = 5000
K = 100000
P = 1000
a_large = random.sample(range(K), P)
b_large = random.sample(range(K), P)
benchmark2 = [
timeit.timeit(
'f1(a_large, b_large)', setup='from __main__ import f1, a_large,b_large', number=N),
timeit.timeit(
'f2(a_large, b_large)', setup='from __main__ import f2, a_large,b_large', number=N),
timeit.timeit(
'f3(a_large, b_large)', setup='from __main__ import f3, a_large,b_large', number=N),
timeit.timeit(
'f4(a_large, b_large)', setup='from __main__ import f4, a_large,b_large', number=N)
]
for index, value in enumerate(benchmark2):
print " - Large sample with {0} elements -> f{1}={2}".format(K, index + 1, value)
- Small sample with 4 elements -> f1=7.50175959666
- Small sample with 4 elements -> f2=5.52386084127
- Small sample with 4 elements -> f3=7.12457549607
- Small sample with 4 elements -> f4=7.24530968309
- Large sample with 100000 elements -> f1=0.512278885906
- Large sample with 100000 elements -> f2=28.0679210232
- Large sample with 100000 elements -> f3=1.05977378475
- Large sample with 100000 elements -> f4=1.17144886156
Conclusion: It seems f2 function is the slightly faster method when N is big and the lists are litte. When the arrays are large and the number is little, f1 is the winner though.
Specs: Python2.7.11(64) , N=5000000 on a i-7 2.6Ghz