This code is meant to find the average promotion value in a given month in a two-year period. In total there are about 11,000 rows in the data set that need to be looked over. The code has been running for 5 minutes and the results still haven't been posted. I'm a still very novice in my coding career so any tips onto better optimize code for faster completion times would be appreciated!
import pandas as pd
df = pd.read_csv(r'C:\Users\james.rush\df_LG.csv')
df.head()
Promos = []
Avg_Promo = []
Dates = []
#This function is used to determine the Average Promotion during any given month/year
def Promo_Avg(Date):
for x in df['Date']: #For all dates in dataframe
Promo_Value = df.loc[df['Date'] == Date, 'Promo'] #Locate the corresponding promo given the provided date
Promos.append(Promo_Value) #Add that Promo to the list of Promos for that month, will need list length later
Average_Promotion = sum(Promos)/len(Promos) #Average Promotion during the given month
if Average_Promotion not in Avg_Promo: #Prevents Duplicates
Avg.append(Average_Promotion)
if Date not in Dates: #If the Current Date being Checked is not in list, add to list. This will prevent Duplicates
Dates.append(Dates)
Function_Dates = [
'January2020',
'Febuary2020',
'March2020'
]
for x in Function_Dates:
Promo_Avg(x)