a=[{'time':3},{'time':4},{'time':5}]
b = []
cumsum = 0
for e in a[::-1]:
cumsum += e['time']
b.insert(0, {'exp':e['time'], 'cumsum':cumsum})
print(b)
Output:
[{'exp': 3, 'cumsum': 12}, {'exp': 4, 'cumsum': 9}, {'exp': 5, 'cumsum': 5}]
So it turns out that inserting at the start of a list is
slow (O(n)). Instead, try a
deque
(O(1)):
from collections import deque
a=[{'time':3},{'time':4},{'time':5}]
b = deque()
cumsum = 0
for e in a[::-1]:
cumsum += e['time']
b.appendleft({'exp':e['time'], 'cumsum':cumsum})
print(b)
print(list(b))
Output:
deque([{'cumsum': 12, 'exp': 3}, {'cumsum': 9, 'exp': 4}, {'cumsum': 5, 'exp': 5}])
[{'cumsum': 12, 'exp': 3}, {'cumsum': 9, 'exp': 4}, {'cumsum': 5, 'exp': 5}]
Here is a script to test the speed of each of the approaches ITT, as well as a chart with timing results:

from collections import deque
from copy import deepcopy
import numpy as np
import pandas as pd
from random import randint
from time import time
def Nehal_pandas(l):
df = pd.DataFrame(l)
df['cumsum'] = df.ix[::-1, 'time'].cumsum()[::-1]
df.columns = ['exp', 'cumsum']
return df.to_json(orient='records')
def Merlin_pandas(l):
df = pd.DataFrame(l).rename(columns={'time':'exp'})
df["cumsum"] = df['exp'][::-1].cumsum()
return df.to_dict(orient='records')
def RahulKP_numpy(l):
cumsum_list = np.cumsum([i['time'] for i in l][::-1])[::-1]
for i,j in zip(l,cumsum_list):
i.update({'cumsum':j})
def Divakar_pandas(l):
df = pd.DataFrame(l)
df.columns = ['exp']
df['cumsum'] = (df[::-1].cumsum())[::-1]
return df.T.to_dict().values()
def cb_insert_0(l):
b = []
cumsum = 0
for e in l[::-1]:
cumsum += e['time']
b.insert(0, {'exp':e['time'], 'cumsum':cumsum})
return b
def cb_deque(l):
b = deque()
cumsum = 0
for e in l[::-1]:
cumsum += e['time']
b.appendleft({'exp':e['time'], 'cumsum':cumsum})
b = list(b)
return b
def cb_deque_noconvert(l):
b = deque()
cumsum = 0
for e in l[::-1]:
cumsum += e['time']
b.appendleft({'exp':e['time'], 'cumsum':cumsum})
return b
def hpaulj_gen(l, var='value'):
cum=0
for i in l:
j=i[var]
cum += j
yield {var:j, 'sum':cum}
def hpaulj_inplace(l, var='time'):
cum = 0
for i in l:
cum += i[var]
i['sum'] = cum
def test(number_of_lists, min_list_length, max_list_length):
test_lists = []
for _ in range(number_of_lists):
test_list = []
number_of_dicts = randint(min_list_length,max_list_length)
for __ in range(number_of_dicts):
random_value = randint(0,50)
test_list.append({'time':random_value})
test_lists.append(test_list)
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = list(hpaulj_gen(l[::-1], 'time'))[::-1]
elapsed_time = time() - start_time
print('hpaulj generator:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
hpaulj_inplace(l[::-1])
elapsed_time = time() - start_time
print('hpaulj in place:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = cb_insert_0(l)
elapsed_time = time() - start_time
print('craig insert list at 0:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = cb_deque(l)
elapsed_time = time() - start_time
print('craig deque:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = cb_deque_noconvert(l)
elapsed_time = time() - start_time
print('craig deque no convert:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
RahulKP_numpy(l) # l changed in place
elapsed_time = time() - start_time
print('Rahul K P numpy:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = Divakar_pandas(l)
elapsed_time = time() - start_time
print('Divakar pandas:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = Nehal_pandas(l)
elapsed_time = time() - start_time
print('Nehal pandas:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')
lists = deepcopy(test_lists)
start_time = time()
for l in lists:
res = Merlin_pandas(l)
elapsed_time = time() - start_time
print('Merlin pandas:'.ljust(25), '%.2f' % (number_of_lists / elapsed_time), 'lists per second')