0

I have a python script that organizes data from site regarding NHL players. The values are being placed into a dataframe. I also have built a function that takes players names and team name to get the total salary of the players line up. I want to pass the players names (F1, F2, F3) from the dataframe to the function (names) and store the result in my excel sheet (totalSalary).

I've tried iloc function to pass into function and I'm getting confused.

from bs4 import BeautifulSoup
import requests
import pandas as pd
import colorama
import crayons
import datetime
import xlsxwriter
import nhl_player_salary as nps

def playerProductionData():

#Getting today's date
#today = str(datetime.date.today())
today = datetime.date.today().strftime("%m-%#d")
today = str(today).replace("-","/")  
#print (today)
#Make it work on Windows machines

colorama.init()

# parameters for pandas display
def start():
    options = {
        'display': {
            'max_columns': None,
            'max_colwidth': 200,
            'expand_frame_repr': False,  # Don't wrap to multiple pages
            'max_rows': 20,
            'max_seq_items': 50,         # Max length of printed sequence
            'precision': 4,
            'show_dimensions': False,
            'colheader_justify': 'left'
        },
        'mode': {
            'chained_assignment': None   # Controls SettingWithCopyWarning
        }
    }



    for category, option in options.items():
        for op, value in option.items():
            pd.set_option(f'{category}.{op}', value)  # Python 3.6+

if __name__ == '__main__':
    start()
    del start  # Clean up namespace in the interpreter
#Set Agent Header to scrape data
headers = {"User-Agent":'Mozilla/5.0 (Windows NT 6.3; WOW64) 
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'}

page_link = 'https://www.leftwinglock.com/line-production/index.php?type=3'

#https://leftwinglock.com/articles.php?id=3049

page_response = requests.get(page_link, headers=headers, allow_redirects=False, timeout=5)

# here, we fetch the content from the url, using the requests library
page_content = BeautifulSoup(page_response.content, "html.parser")

#column_headers = page_content.findAll('tr')[0]
column_headers = [th.getText() for th in page_content.findAll('tr')[0].findAll('th')]

data_rows = page_content.findAll('tr')[1:]
player_data = [[td.getText() for td in data_rows[i].findAll('td', limit=14)] for i in range(len(data_rows))] #PLAYER DATA 

#print (column_headers)
df = pd.DataFrame(player_data,columns=['Team', 'F1', 'F2', 'F3', 'GF', 'GA', 'GF%', 'SATF', 'SAT%', 'USATF', 'USAT%', 'SH%', 'SV%', 'SHSV%'])
#initilize total salary
df['TotalSalary'] = 0

#nps.getPlayerSalary(player_data.teamAbbrv)
#df['TotalSalary'] = nps.getPlayerSalary(df.iloc[:,0], ["ARVIDSSON","JOHANSEN", "FORSBERG"])

#print (df)

convert_fill(df)
df['SATF'] = df['SATF'].astype(int)
df['GF'] = df['GF'].astype(int)

#Create a Pandas Excel writer using XlsxWriter as the engine.
writer = pd.ExcelWriter('player_line_production_data.xlsx', engine='xlsxwriter')

# Convert the dataframe to an XlsxWriter Excel object.
df.to_excel(writer, sheet_name='Player Line Production Data')

# Close the Pandas Excel writer and output the Excel file.
writer.save()       

return 'Run Successful'

def convert_fill(df):
return df.stack().apply(pd.to_numeric, errors='ignore').fillna(0).unstack()


 print(playerProductionData())

import pandas as pd
from pandas import DataFrame

def getPlayerSalary(teamAbbrv, names):
#Get Most Recent Draft Kings Salary List
DKSalary = (pd.read_csv('DKSalaries.csv'))
DKSalary_DF = DataFrame(DKSalary, columns=['Position', 'Name', 'Salary', 'AvgPointsPerGame', 'TeamAbbrev'])
i = 0

def getDataFrameForNameTeam(teamAbbrv, name):

    filterName = DKSalary_DF[DKSalary_DF['Name'].str.contains(name.title())]
    filterName = filterName[filterName['TeamAbbrev'].str.contains(teamAbbrv)]
    return filterName

nameDF = getDataFrameForNameTeam(teamAbbrv, names[0])

while i < len(names) - 1:
    newframe = getDataFrameForNameTeam(teamAbbrv, names[i + 1])
    nameDF = pd.concat([nameDF, newframe])
    i += 1


return  nameDF['Salary'].sum()

print (getPlayerSalary ('NSH', ["ARVIDSSON","JOHANSEN", "FORSBERG"])) 
user3066155
  • 157
  • 2
  • 2
  • 15
  • Please construct a [mcve]. If you need help with this, read [How to make good reproducible pandas examples](https://stackoverflow.com/questions/20109391/how-to-make-good-reproducible-pandas-examples). – jpp Feb 02 '19 at 23:10

1 Answers1

0

Once you have your DataFrame built, you can query it for individual players to get their salary by something like this:

df['Name' == 'Bob']['Salary'].sum()

The issue you may realize is that there is no guarantee that the names are unique. They are not indexed in your example... So the above will get any players named "Bob" on any team and add them.

It appears from your post that you are looking for team totals, just use pandas 'groupie()' function to sum the team:

df.groupby('Team')['Salary'].sum()

The above groups the df by team, then sums the 'Salary' column per group.

AirSquid
  • 10,214
  • 2
  • 7
  • 31
  • I want to pass Team, F1, F2, and F3 to getPlayerSalary from playerProductionData and then store result in the row – user3066155 Feb 03 '19 at 01:02
  • I want apply function getPlayerSalary (Team, Names) the entire dataframe and store result in a column called TotalSalary. – user3066155 Feb 03 '19 at 16:58