I am having the following code.
pd.DataFrame({'user_wid': {0: 3305613, 1: 57, 2: 80, 3: 31, 4: 38, 5: 12, 6: 35, 7: 25, 8: 42, 9: 16}, 'user_name': {0: 'Ter', 1: 'Am', 2: 'Wi', 3: 'Ma', 4: 'St', 5: 'Ju', 6: 'De', 7: 'Ri', 8: 'Ab', 9: 'Ti'}, 'user_age': {0: 41, 1: 34, 2: 45, 3: 47, 4: 70, 5: 64, 6: 64, 7: 63, 8: 32, 9: 24}, 'user_gender': {0: 'Male', 1: 'Female', 2: 'Male', 3: 'Male', 4: 'Male', 5: 'Female', 6: 'Female', 7: 'Female', 8: 'Female', 9: 'Female'}, 'sale_date': {0: '2018-05-15', 1: '2020-02-28', 2: '2020-04-02', 3: '2020-05-09', 4: '2020-11-29', 5: '2020-12-14', 6: '2020-04-21', 7: '2020-06-15', 8: '2020-07-03', 9: '2020-08-10'}, 'days_since_first_visit': {0: 426, 1: 0, 2: 0, 3: 8, 4: 126, 5: 283, 6: 0, 7: 189, 8: 158, 9: 270}, 'visit': {0: 4, 1: 1, 2: 1, 3: 2, 4: 4, 5: 3, 6: 1, 7: 2, 8: 4, 9: 2}, 'num_user_visits': {0: 4, 1: 2, 2: 1, 3: 2, 4: 10, 5: 7, 6: 1, 7: 4, 8: 4, 9: 2}, 'product': {0: 13, 1: 2, 2: 2, 3: 2, 4: 5, 5: 5, 6: 1, 7: 8, 8: 5, 9: 4}, 'sale_price': {0: 10.0, 1: 0.0, 2: 41.3, 3: 41.3, 4: 49.95, 5: 74.95, 6: 49.95, 7: 5.0, 8: 0.0, 9: 0.0}, 'whether_member': {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0, 9: 0}})
def f(x):
d = {}
d['user_name'] = x['user_name'].max()
d['user_age'] = x['user_age'].max()
d['user_gender'] = x['user_gender'].max()
d['last_visit_date'] = x['sale_date'].max()
d['days_since_first_visit'] = x['days_since_first_visit'].max()
d['num_visits_window'] = x['visit'].max()
d['num_visits_total'] = x['num_user_visits'].max()
d['products_used'] = x['product'].max()
d['user_total_sales'] = (x['sale_price'].sum()).round(2)
d['avg_spend_visit'] = (x['sale_price'].sum() / x['visit'].max()).round(2)
d['membership'] = x['whether_member'].max()
return pd.Series(d)
users = xactions.groupby('user_wid').apply(f).reset_index()
It is taking too much time to execute, I want to optimize the following function. Any suggestions would be appreciated.
Thanks in advance.