I have a python script that organizes data from site regarding NHL players. The values are being placed into a dataframe. I also have built a function that takes players names and team name to get the total salary of the players line up. I want to pass the players names (F1, F2, F3) from the dataframe to the function (names) and store the result in my excel sheet (totalSalary).
I've tried iloc function to pass into function and I'm getting confused.
from bs4 import BeautifulSoup
import requests
import pandas as pd
import colorama
import crayons
import datetime
import xlsxwriter
import nhl_player_salary as nps
def playerProductionData():
#Getting today's date
#today = str(datetime.date.today())
today = datetime.date.today().strftime("%m-%#d")
today = str(today).replace("-","/")
#print (today)
#Make it work on Windows machines
colorama.init()
# parameters for pandas display
def start():
options = {
'display': {
'max_columns': None,
'max_colwidth': 200,
'expand_frame_repr': False, # Don't wrap to multiple pages
'max_rows': 20,
'max_seq_items': 50, # Max length of printed sequence
'precision': 4,
'show_dimensions': False,
'colheader_justify': 'left'
},
'mode': {
'chained_assignment': None # Controls SettingWithCopyWarning
}
}
for category, option in options.items():
for op, value in option.items():
pd.set_option(f'{category}.{op}', value) # Python 3.6+
if __name__ == '__main__':
start()
del start # Clean up namespace in the interpreter
#Set Agent Header to scrape data
headers = {"User-Agent":'Mozilla/5.0 (Windows NT 6.3; WOW64)
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}
page_link = 'https://www.leftwinglock.com/line-production/index.php?type=3'
#https://leftwinglock.com/articles.php?id=3049
page_response = requests.get(page_link, headers=headers, allow_redirects=False, timeout=5)
# here, we fetch the content from the url, using the requests library
page_content = BeautifulSoup(page_response.content, "html.parser")
#column_headers = page_content.findAll('tr')[0]
column_headers = [th.getText() for th in page_content.findAll('tr')[0].findAll('th')]
data_rows = page_content.findAll('tr')[1:]
player_data = [[td.getText() for td in data_rows[i].findAll('td', limit=14)] for i in range(len(data_rows))] #PLAYER DATA
#print (column_headers)
df = pd.DataFrame(player_data,columns=['Team', 'F1', 'F2', 'F3', 'GF', 'GA', 'GF%', 'SATF', 'SAT%', 'USATF', 'USAT%', 'SH%', 'SV%', 'SHSV%'])
#initilize total salary
df['TotalSalary'] = 0
#nps.getPlayerSalary(player_data.teamAbbrv)
#df['TotalSalary'] = nps.getPlayerSalary(df.iloc[:,0], ["ARVIDSSON","JOHANSEN", "FORSBERG"])
#print (df)
convert_fill(df)
df['SATF'] = df['SATF'].astype(int)
df['GF'] = df['GF'].astype(int)
#Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('player_line_production_data.xlsx', engine='xlsxwriter')
# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Player Line Production Data')
# Close the Pandas Excel writer and output the Excel file.
writer.save()
return 'Run Successful'
def convert_fill(df):
return df.stack().apply(pd.to_numeric, errors='ignore').fillna(0).unstack()
print(playerProductionData())
import pandas as pd
from pandas import DataFrame
def getPlayerSalary(teamAbbrv, names):
#Get Most Recent Draft Kings Salary List
DKSalary = (pd.read_csv('DKSalaries.csv'))
DKSalary_DF = DataFrame(DKSalary, columns=['Position', 'Name', 'Salary', 'AvgPointsPerGame', 'TeamAbbrev'])
i = 0
def getDataFrameForNameTeam(teamAbbrv, name):
filterName = DKSalary_DF[DKSalary_DF['Name'].str.contains(name.title())]
filterName = filterName[filterName['TeamAbbrev'].str.contains(teamAbbrv)]
return filterName
nameDF = getDataFrameForNameTeam(teamAbbrv, names[0])
while i < len(names) - 1:
newframe = getDataFrameForNameTeam(teamAbbrv, names[i + 1])
nameDF = pd.concat([nameDF, newframe])
i += 1
return nameDF['Salary'].sum()
print (getPlayerSalary ('NSH', ["ARVIDSSON","JOHANSEN", "FORSBERG"]))