0

enter image description here

I want to extract all the prices from the above site. Yet I get no results back. To avoid the captcha I use a headless header.

self.driver.find_elements(By.CLASS_NAME, "property__price")

returns nothing

enter image description here

This is my whole code

    from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from fake_useragent import UserAgent
class PropertyScraper:
    def __init__(self, base_url, location):
        options = Options()
        ua = UserAgent()
        userAgent = ua.random
        options.add_argument(f'user-agent={userAgent}')
        options.add_argument("--window-size=1920,1080")
        options.add_argument("--start-maximized")
        options.add_argument("--headless")
        options.add_argument("--disable-gpu")
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        self.driver = webdriver.Chrome(options=options)
        self.driver.implicitly_wait(20)
        self.driver.get(f"{base_url}{location}")

    def scrape_property_data(self, pages_to_scrape):
        all_data = []  
        for _ in range(pages_to_scrape):
            property_prices = self.driver.find_elements(By.CLASS_NAME, "property__price")
            property_square_meters = self.driver.find_elements(By.CLASS_NAME, "property__title__parts")
            for price_element, square_meter_element in zip(property_prices, property_square_meters):
                price = price_element.text.strip()
                square_meter = square_meter_element.text.strip()
                all_data.append({"price": price, "square_meter": square_meter})
            # time.sleep(random.uniform(5, 10))  # Random delay between 5 to 10 seconds
            next_button = self.driver.find_element(By.CSS_SELECTOR, "li.next.enabled")
            actions = ActionChains(self.driver)
            actions.move_to_element(next_button).perform()  # Move to the "next" button
            next_button.click()
        return all_data

    def close_driver(self):
        self.driver.quit()

def main():
    base_url = "https://www.spiti24.gr/en/for-sale/property/"
    location = "glyfada"
    pages_to_scrape = 5  
    scraper = PropertyScraper(base_url, location)
    scraped_data = scraper.scrape_property_data(pages_to_scrape)
    scraper.close_driver()
    print(scraped_data)

if __name__ == "__main__":
    main()
asd
  • 13
  • 4
  • Please [edit the question](/posts/76795060/edit) to limit it to a specific problem with enough detail to identify an adequate answer. Avoid asking multiple distinct questions at once. See the [How to Ask](https://stackoverflow.com/help/how-to-ask) page for help clarifying this question. – undetected Selenium Jul 29 '23 at 19:27
  • Hi and thank you. I edited my question to limit it to a single specific problem. Thank you for the advice – asd Jul 29 '23 at 19:36

1 Answers1

0

To extract and print the textContents ideally you need to induce WebDriverWait for visibility_of_all_elements_located() and you can use either of the following Locator Strategies:

  • Using CSS_SELECTOR and get_attribute("innerHTML"):

    print([my_elem.get_attribute("innerHTML") for my_elem in WebDriverWait(self.driver, 20).until(EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "h3.property__title span.property__price")))])
    
  • Using XPATH and text attribute:

    print([my_elem.text for my_elem in WebDriverWait(self.driver, 20).until(EC.visibility_of_all_elements_located((By.XPATH, "//h3[@class='property__title']//span[@class='property__price']")))])
    
  • Note : You have to add the following imports :

    from selenium.webdriver.support.ui import WebDriverWait
    from selenium.webdriver.common.by import By
    from selenium.webdriver.support import expected_conditions as EC
    

Outro

Link to useful documentation:

undetected Selenium
  • 183,867
  • 41
  • 278
  • 352