0

I am trying to scrape data from a website https://www.eex.com/en/market-data/power/futures#%7B%22snippetpicker%22%3A%2228%22%7D. With a little help I got as far as the code below, but I would need data for different dates (you can choose a specific date in the dropdown widget on the left). I got stuck here.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd


driver = webdriver.Chrome() 


driver.get(url='https://www.eex.com/en/market-data/power/futures')
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "input[value='I accept all cookies.']"))).click()
time.sleep(3)
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn.dropdown-toggle.form.input-select div.filter-option-inner"))).click()
WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[@class='dropdown-menu show']//li/a[@class='dropdown-item']/span[contains(., 'EEX German Power Futures')]"))).click()
table_data = WebDriverWait(driver, 20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#baseloadwidget_pfpde > table.mv-quote"))).get_attribute("outerHTML")
df_list = pd.read_html(table_data)
if len(df_list) > 0:
    df = df_list[0] 
    print(df)
else:
    print("No data found on the table.")


driver.quit()

I am trying to scrape different data published on different dates but I can't seem to get through.

  • You have constructed this question based on my [answer](https://stackoverflow.com/a/76823591/7429447) to your previous [question](https://stackoverflow.com/q/76821928/7429447) without providing any credits. Can I have a feedback on my answer to your previous question please? – undetected Selenium Aug 03 '23 at 23:34

2 Answers2

1

A bit tricky as far as input in your case is left on the page as a part of invisible widget. You can wrap this code into function and use to choose dates.

// after line when you select item from dropdown WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.XPATH, "//div[@class='dropdown-menu show']//li/a[@class='dropdown-item']/span[contains(., 'EEX German Power Futures')]"))).click()

wdwait = WebDriverWait(driver, 10)
input = wdwait.until(EC.visibility_of_any_elements_located((By.CSS_SELECTOR, "[id*=snippet] .mv-date-input input")))
table_element_text = wdwait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#baseloadwidget_pfpde > table.mv-quote"))).text
input[0].clear()
input[0].send_keys("2023-07-25")
input[0].send_keys(Keys.ENTER)
wdwait.until_not(EC.text_to_be_present_in_element((By.ID, "iv#baseloadwidget_pfpde > table.mv-quote"), table_element_text))
wdwait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#baseloadwidget_pfpde > table.mv-quote tbody tr")))
table_data = wdwait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#baseloadwidget_pfpde > table.mv-quote"))).get_attribute("outerHTML")
df_list = pd.read_html(table_data)
if len(df_list) > 0:
    df = df_list[0]
    print(df)
else:
    print("No data found on the table.")

Explanation:

  1. You wait for visible widget on the page (among other invisible)
  2. You get widget text (that you expect to be changed on the next date)
  3. You clear input, type new value, press enter
  4. You wait until previous text is not present in widget
  5. You wait until new rendered contains at least 1 row (condition of rendering)
  6. You scrap your table data
Yaroslavm
  • 1,762
  • 2
  • 7
  • 15
0

Here's how you can get the Base table data by date:

import time
import pandas as pd
from selenium.webdriver import Chrome, ChromeOptions, Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

def data_by_date(day, month, year):
    
    options = ChromeOptions()
    options.add_argument("--start-maximized")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])

    driver = Chrome(options=options)
    wait = WebDriverWait(driver, 20)

    driver.get(url='https://www.eex.com/en/market-data/power/futures')
    wait.until(
        EC.element_to_be_clickable((By.CSS_SELECTOR, "input[value='I accept all cookies.']"))).click()
    time.sleep(3)
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "button.btn.dropdown-toggle.form.input-select div.filter-option-inner"))).click()
    wait.until(EC.element_to_be_clickable((By.XPATH, "//div[@class='dropdown-menu show']//li/a[@class='dropdown-item']/span[contains(., 'EEX German Power Futures')]"))).click()
    
    # Find and set the date input field to the desired date
    calender_container = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div#symbolheader_pfpde')))
    date_input = calender_container.find_element(By.CSS_SELECTOR, 'input.mv-input-box')
    date_input.clear()
    date_input.send_keys(f'{year}-{month}-{day}')
    date_input.send_keys(Keys.ENTER)

    table_data = wait.until(EC.visibility_of_element_located((By.CSS_SELECTOR, "div#baseloadwidget_pfpde > table.mv-quote")))
    columns = [i.text for i in table_data.find_elements(By.CSS_SELECTOR, 'tr.mv-quote-header-row>th')]

    all_data = []

    for row in WebDriverWait(table_data, 10).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, 'tbody>tr'))):
        data = [i.text for i in row.find_elements(By.CSS_SELECTOR, 'td[style^="text-align:"]')]
        all_data.append(data)

    df = pd.DataFrame(data=all_data, columns=columns[:-1])
    return df

print(data_by_date(day=2, month=8, year= 2023))

output:

   Future Last Price Last Volume Settlement Price Volume Exchange Volume Trade Registration Open Interest
0  Cal-24     134.00       8,784           134.52       2,714,256                 2,643,984        72,459
1  Cal-25     124.75       8,760           124.67         604,440                   289,080        17,377
2  Cal-26     106.00       8,760           105.59          87,600                   350,400         4,072
3  Cal-27      90.25       8,760            90.23          17,520                   113,880           787
4  Cal-28          -           -            84.18               -                         -           111
5  Cal-29          -           -            82.65               -                         -            13
6  Cal-30          -           -            83.11               -                         -             7
7  Cal-31          -           -            82.93               -                         -             2
8  Cal-32          -           -            82.78               -                         -             2
9  Cal-33          -           -            81.93               -                         -             0

desired Base table

reference:

Ajeet Verma
  • 2,938
  • 3
  • 13
  • 24