Script encounters an error while parsing names and addresses out of populated results

Question

I've created a script to parse the names and addresses of different results (56 in this case) from a website. The addresses become visible when a click is initiated on each result. However, When I run the following script, I get two results but then I encounter this element not interactable error pointing at this line item.click().

website link

To populate the result it is necessary to fill in the searchbox with this M203DB and hit the search button.

I've tried with:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys

def get_content(link):
    driver.get(link)
    WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input.inv_addressSuggestion"))).send_keys('M203DB',Keys.RETURN)

    for item in WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "[id^='listItemContainer_']"))):
        item.click()
        name = item.find_element_by_css_selector("span.detailContactVal").text
        address = item.find_element_by_css_selector("span.detailAddr").text
        print(name,address)

if __name__ == '__main__':
    URL = 'https://www.sjp.co.uk/site-services/find-your-adviser?async=1'
    with webdriver.Chrome() as driver:
        get_content(URL)

Error the script throws:

raise exception_class(message, screen, stacktrace)
selenium.common.exceptions.ElementNotInteractableException: Message: element not interactable

How can I scrape names and addresses out of all the results?

score 1 · Accepted Answer · edited May 03 '21 at 14:30

This error indicates that the items you're trying to click are not visible in the viewport.

Check this Q/A "How to resolve ElementNotInteractableException: Element is not visible in Selenium webdriver?" for more details and solutions.

There are many challenges to this specific scraping (https://www.sjp.co.uk/site-services/find-your-adviser).

It has a responsive layout, which displays differently for mobile
It has a "Load more" button for mobile view
It has grouped sections which need to be expanded
It has multiple views and this application renders duplicate IDs. If you try to get the length of the query document.querySelectorAll("[id^='listItemContainer_']").length you'll get 112 instead of 56 which the total results are. You'll have to limit the results to the first container using this selector #mCSB_1_container [id^='listItemContainer_'].

Here is a working python script that utilizes all these. Please read the inline comments:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
import re


def get_content(link):
    driver.get(link)
    WebDriverWait(driver,20).until(EC.visibility_of_element_located((By.CSS_SELECTOR, "input.inv_addressSuggestion"))).send_keys('M203DB',Keys.RETURN)

    try:
        # If there is a "Load more" button, click it
        load_more = WebDriverWait(driver,5).until(EC.visibility_of_element_located((By.CSS_SELECTOR, ".inv_loadMoreBtn")))
        load_more.click()
    except:
        pass

    # Select from #mCSB_1_container element and not everything to avoid dublicates
    for item in WebDriverWait(driver,20).until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, "#mCSB_1_container [id^='listItemContainer_']"))):
        print("#%s\n" % (item.get_attribute("id")))

        # Get the parent element and its class
        parent = item.find_element_by_xpath('..')
        parent_class = parent.get_attribute("class")

        # If parent element has "inv_childData" then it's grouped and it needs to be expanded
        if re.search(r"inv_childData", parent_class):
            # If the parent expandable element is not expanded (has not the class "inv_childCollapsed")
            if not re.search(r"inv_childCollapsed", parent_class):
                actions = ActionChains(driver)
                # Move to element
                actions.move_to_element(parent).perform()
                # And click it to expand the child items
                parent.click()

        # Use Javascript scrollIntoView() to scroll the view to the item to avoid ElementNotInteractableException
        driver.execute_script("arguments[0].scrollIntoView()", item)

        # Click the item
        item.click()
        
        name = item.find_element_by_css_selector("span.detailContactVal").text
        address = item.find_element_by_css_selector("span.detailAddr").text

        print("Name: '%s'\nAddress:\n%s\n" % (name, address))

if __name__ == '__main__':
    URL = 'https://www.sjp.co.uk/site-services/find-your-adviser?async=1'

    chrome_options = Options()

    # Make headless
    # chrome_options.add_argument("--headless")

    # Make window size to 1920x1080 to avoid responsive mobile view
    # chrome_options.add_argument("--window-size=1920,1080 ")

    with webdriver.Chrome(options=chrome_options) as driver:
        get_content(URL)

Did you have option to take a look at [this post](https://stackoverflow.com/questions/60838550/unable-to-upload-a-pdf-file-using-send-keys-or-requests) @Christos Lytras? I'll be glad if you spare some moments to solve the issue. Thanks. — robots.txt, Mar 31 '20 at 10:23

Script encounters an error while parsing names and addresses out of populated results

1 Answers1