I have the following code:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import pandas as pd
import requests
from requests import get
date = []
tourney_round = []
result = []
winner_odds = []
loser_odds = []
surface = []
players_and_tourney
response = get('http://www.tennisexplorer.com/player/humbert-e2553/?annual=all')
page_html = BeautifulSoup(response.text, 'html.parser')
results2018_containers = page_html.find_all('div', id = 'matches-2018-1-data')
for container in results2018_containers:
played_date_2018 = results2018_containers[0].findAll('td', class_ = 'first time')
for i in played_date_2018:
date.append(i.text)
string_2018 = '2018'
date = [x + string_2018 for x in date]
for container in results2018_containers:
rounds_2018 = results2018_containers[0].findAll('td', class_ = 'round')
for i in rounds_2018:
tourney_round.append(i.text)
for container in results2018_containers:
results_2018 = results2018_containers[0].findAll('td', class_ = 'tl')
for i in results_2018:
result.append(i.text)
for container in results2018_containers:
surfaces_2018 = results2018_containers[0].findAll('td', class_ = 's-color')
for i in surfaces_2018:
surface.append(i.find('span')['title'])
for container in results2018_containers:
odds_2018 = results2018_containers[0].findAll('td', class_ = 'course')
winner_odds_2018 = odds_2018[0:][::2]
for i in winner_odds_2018:
winner_odds.append(i.text)
loser_odds_2018 = odds_2018[1:][::2]
for i in loser_odds_2018:
loser_odds.append(i.text)
for container in results2018_containers:
namesandtourney_2018 = results2018_containers[0].findAll('td', class_ = 't-name')
for i in namesandtourney_2018:
players_and_tourney.append(i.text)
from itertools import chain, groupby, repeat
chainer = chain.from_iterable
def condition(x):
return x.startswith('\xa0')
elements = [list(j) for i, j in groupby(players_and_tourney, key=condition) if not i]
# create list of headers
headers = [next(j) for i, j in groupby(players_and_tourney, key=condition) if i]
# chain list of lists, and use repeat for headers
initial_df_2018 = pd.DataFrame({'Date': date,
'Surface': surface,
'Players': list(chainer(elements)),
'Tournament': list(chainer(repeat(i, j) for i, j in \
zip(headers, map(len, elements)))),
'Round': tourney_round,
'Result': result,
'Winner Odds': winner_odds,
'Loser Odds' : loser_odds})
initial_df_2018['Winner'], initial_df_2018['Loser'] =
initial_df_2018['Players'].str.split(' - ', 1).str
del initial_df_2018['Players']
initial_df_2018 = initial_df_2018[['Date','Surface','Tournament','Winner','Loser','Result','Winner Odds','Loser Odds']]
I want to create a loop that runs the code for every year starting from 2005. So basically, running a loop by replacing 2018 throughout the code by each year between 2005 an 2018. If possible, the code would run first for the year 2018, then 2017, and so on until 2005.
Edit: I added the code that i used to pull data for the year 2018, but I want to have a loop that will pull data for all the years that can be found on the page.