I am attempting to webscrape data that is hidden within a #shadow-root (open). I've managed to make progress, however, I am getting stuck at the end and was wondering if someone could help me finish it out.
Code:
from bs4 import BeautifulSoup as bs
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import Select
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import time as t
import pandas as pd
options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument('disable-notifications')
options.add_argument("start-maximized")
options.add_experimental_option("detach", True)
browser = webdriver.Chrome(service=Service(ChromeDriverManager().install()),options=options)
actions = ActionChains(browser)
url = "https://iltacon2022.expofp.com/?aceds-association-of-certified-e-discovery-specialists"
browser.get(url)
exhibitor_el = WebDriverWait(browser, 10).until(EC.presence_of_element_located((By.XPATH, '//div[@data-event-id="iltacon2022"]/div')))
exhibitor_el_shadow_root = exhibitor_el.shadow_root
t.sleep(5)
companies_divv = exhibitor_el_shadow_root.find_element(By.CSS_SELECTOR, 'div[class="overlay-content exhibitor"]')
try:
name = exhibitor_el_shadow_root.find_elements(By.CSS_SELECTOR, "div[class = 'exhibitor__bar']").text
except AttributeError:
name = "Couldn't Find"
try:
booth = exhibitor_el_shadow_root.find_elements(By.CSS_SELECTOR, "a[class = 'exhibitor__categories-booth']").text
except AttributeError:
booth = "Couldn't Find"
try:
url = exhibitor_el_shadow_root.find_elements(By.CSS_SELECTOR, "div[class = 'exhibitor__meta']").get_attribute('href')
except AttributeError:
url = "Couldn't Find"
print(name)
print(booth)
print(url)
The output I am getting is "Couldn't Find", but I think that is just because I am misusing the CSS Selector or failed to get into the #shadow-root.
Desired output:
Name: ACEDS - Association of Certified E-Discovery Specialists
Booth: 828
Url: https://www.aceds.org/