I am trying to iterate between xpath
on www.oddsportal.com
I tested the code and it works for element.click()
as below:
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://www.oddsportal.com/matches/soccer/")
element = browser.find_element_by_xpath("/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/div[4]/div/div/span/a[3]")
element.click()
The Xpath that I want the urls
to iterate between are:
/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/div[3]/div/div/span/a[2]
/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/div[3]/div/div/span/a[3]
/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/div[3]/div/div/span/a[4]
/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/div[3]/div/div/span/a[5]
/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/div[3]/div/div/span/a[6]
/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/div[3]/div/div/span/a[7]
/html/body/div[1]/div/div[2]/div[6]/div[1]/div/div[1]/div[2]/div[1]/div[3]/div/div/span/a[8]
I have a code that scrapes any given set of urls
as below:
import pandas as pd
from selenium import webdriver
from datetime import datetime
from bs4 import BeautifulSoup as bs
browser = webdriver.Chrome()
urls = {
"https://www.oddsportal.com/matches/soccer/"
}
class GameData:
def __init__(self):
self.country = []
def parse_data(url):
browser.get(url)
df = pd.read_html(browser.page_source, header=0)[0]
html = browser.page_source
soup = bs(html, "lxml")
cont = soup.find('div', {'id': 'wrap'})
content = cont.find('div', {'id': 'col-content'})
content = content.find('table', {'class': 'table-main'}, {'id': 'table-matches'})
main = content.find('th', {'class': 'first2 tl'})
if main is None:
return None
count = main.findAll('a')
country = count[0].text
game_data = GameData()
for row in df.itertuples():
if not isinstance(row[1], str):
continue
elif ':' not in row[1]:
country = row[1].split('»')[0]
continue
game_data.country.append(country)
return game_data
if __name__ == '__main__':
results = None
for url in urls:
game_data = parse_data(url)
if game_data is None:
continue
result = pd.DataFrame(game_data.__dict__)
if results is None:
results = result
else:
results = results.append(result, ignore_index=True)
How can I integrate xpath
into this code?
I tried the solutions discussed here however I am getting nowhere or probably I am a bit early into the learning curve.