1

Currently trying to retrieve data from this url.

I am trying to retrieve the following: Unit Name, Site Street 1, 2, Site Ciy, Province/State, Code, Facility Category, Completed. I have successfully done so but the code seems to appear in one line. For example:

@ The Trax Public House 304 Westgrove Drive Spruce Grove AB T7X 4P9 Food - General 1

My expected output is so that I could collect the specific attribute: Unit Name: @ The Trax Public House, Site Street 1: 304 Westgrove Drive, and etc..

Here is my code

start_time = time.time()

driver = webdriver.Chrome(ChromeDriverManager().install())
driver.implicitly_wait(5)

URL = 'https://ephisahs.microsoftcrmportals.com/disclaimer/restaurantinspections/edmonton-facilities/'

driver.get(URL)

restaurant_names = []

search =  driver.find_elements_by_xpath("//tr[@data-name]")
for restaurants in search:
    print(restaurants.text)

time.sleep(5)

driver.close()

Also: I want to loop this through all the possible pages possible but I notice in the URL there doesn't seem to be a page number. How can I loop through all the pages possible on this website?

vitaliis
  • 4,082
  • 5
  • 18
  • 40
mathgeek
  • 125
  • 7

1 Answers1

1

You should wait and locate not only the head elements, but their children as well.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


driver = webdriver.Chrome(executable_path='/snap/bin/chromium.chromedriver')
driver.implicitly_wait(10)
driver.get('https://ephisahs.microsoftcrmportals.com/disclaimer/restaurantinspections/edmonton-facilities/')

wait = WebDriverWait(driver, 30)
wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//tr[@data-name]")))
cards = driver.find_elements_by_xpath("//tr[@data-name]")
facilities = []
for card in cards:
    name = card.find_element_by_xpath(".//td[@data-attribute='name']").get_attribute("data-value")
    street1 = card.find_element_by_xpath(".//td[@data-attribute='address1_line1']").get_attribute("data-value")
    street2 = card.find_element_by_xpath(".//td[@data-th='Site Street 2']").text
    site_city = card.find_element_by_xpath(".//td[@data-attribute='address1_city']").text
    province = card.find_element_by_xpath(".//td[@data-attribute='address1_stateorprovince']").text
    postal_code = card.find_element_by_xpath(".//td[@data-th='Site Postal Code/Zip Code']").text
    facility_category = card.find_element_by_xpath(".//td[@data-attribute='fs_facilitycategory']").text
    inspections = card.find_element_by_xpath(".//td[@data-th='Inspections Completed']").text
    facilities.append([name, street1, street2, site_city, province, postal_code, facility_category, inspections])

for p in facilities:
    print(p, sep='\n')

driver.close()
driver.quit()

My output:

['@ The Trax Public House', '304 Westgrove Drive', '', 'Spruce Grove', 'AB', 'T7X 4P9', 'Food - General', '1']
['1000 Women Child Care Centre - Kitchen', '10215 108 Street NW', '', 'Edmonton', 'AB', 'T5J 1L6', 'Food - General', '1']
['101 Bar & Grill', '7317 101 Avenue NW', '', 'Edmonton', 'AB', 'T6A 0H9', 'Food - General', '8']
['10213 75 Street NW', '10213 75 St NW', '', 'Edmonton', 'Alberta', 'T6A 2Z3', 'Food - General', '0']
['104 Street Public Market', '104 Street between Jasper Avenue and 102 Avenue', '', 'Edmonton', 'Alberta', '', 'Food - General', '0']
['107 Convenience Store', '10118 107 Avenue', '', 'Edmonton', 'AB', 'T5H 0V5', 'Food - General', '2']
['110 Glencoe Boulevard', '110 Glencoe Blvd', '', 'Sherwood Park', 'AB', 'T8A 5J5', 'Food - General', '1']
['11416 43 Avenue NW - Foster Kitchen Gudlay', '11416 43 Ave NW', '', 'Edmonton', 'Alberta', 'T6J 0Y1', 'Food - Un-permitted', '2']
['115th Avenue Super Market', '9110 115 Avenue NW', '', 'Edmonton', 'AB', 'T5B 0M1', 'Food - General', '1']
['1174 Hainstock Green SW', '1174 Hainstock Grn SW', '', 'Edmonton', 'Alberta', 'T6W 3B6', 'Food - Un-permitted', '1']
['118 Ave Super Food Store', '3637 118 Avenue NW', '', 'Edmonton', 'AB', 'T5W 0Z3', 'Food - General', '0']
['12 Mile Wolf Catering', '7 St Vital Avenue', '', 'St. Albert', 'Alberta', 'T8N 1K1', 'Food - General', '1']
['12 Mile Wolf Catering - Links - Event', '7 St Vital Avenue', '', 'St. Albert', 'Alberta', 'T8N 1K1', 'Food - Special Event', '6']
['12 Mile Wolf Catering - Mac & Cheese - Event', '7 St Vital Avenue', '', 'St. Albert', 'Alberta', 'T8N 1K1', 'Food - Special Event', '3']
['1209 McAllister Way SW', '1209 McAllister Way SW', '', 'Edmonton', 'Alberta', 'T6W 1X8', 'Food - Un-permitted', '1']
['123 Grow - Kitchen', '9900 93 Street', '', 'Fort Saskatchewan', 'AB', 'T8L 4K8', 'Food - General', '1']
['124 Grand Market', '10800 124 St NW', '', 'Edmonton', 'Alberta', 'T5N', "Food - Farmers' Market Coordinator", '0']
['151 22559 Wye Road', '151 22559 Wye Road', '', 'Sherwood Park', 'Alberta', 'T8B 1J7', 'Food - Community Organization Function', '1']
['16 Greer Crescent', '16 Greer Crescent', '', 'St. Albert', 'Alberta', 'T8N 1T8', 'Food - General', '1']
['16325 55A Street NW', '16325 55A Street NW', '', 'Edmonton', 'AB', 'T5Y 3L2', 'Food - General', '0']
['19th Hole at Goose Hummock', '23210 Township Road 564', '', 'Gibbons', 'Alberta', 'T0A1N4', 'Food - General', '2']
['1st Rnd', '11248 104 Avenue NW', '', 'Edmonton', 'AB', 'T5K 2X4', 'Food - General', '4']
['1st Round - Kitchen', '1672 West Edmonton Mall NW', '', 'Edmonton', 'AB', 'T5T 4M2', 'Food - General', '5']
['2 Cha', '8130 Gateway Boulevard NW', '', 'Edmonton', 'Alberta', 'T6E 4B1', 'Food - General', '6']
['2 for 1 Pizza and Pasta - Spruce Grove', '23 - 100 King Street', '', 'Spruce Grove', 'AB', 'T7X 4J7', 'Food - General', '2']
['2 For 1 Pizza House - Restaurant', '3747 8 Avenue SW', '', 'Edmonton', 'Alberta', 'T6X 2G6', 'Food - General', '1']
['2 Girls And a Grill', '16703 66 Street NW', '', 'Edmonton', 'AB', 'T5E 5S7', 'Food - General', '1']
['2 Ladies in a Truck', '5707 72A Ave NW', '', 'Edmonton', 'Alberta', 'T6B 2J1', 'Food - General', '1']
['2177412 Alberta Ltd. - Water Hauler - EDM-877', '6231 41 St', '', 'Leduc', 'Alberta', 'T9E 0V7', 'Food - Water Hauler', '2']
['22240 118A Avenue NW - Kitchen', '22240 118A Ave NW', '', 'Edmonton', 'Alberta', 'T5S 2B9', 'Food - Un-permitted', '1']
['2246201 Alberta Ltd.', '18 Fairway Drive', '', 'Stony Plain', 'AB', 'T7Z 1M3', 'Food - General', '0']
['2285 Austin Way SW', '2285 Austin Way SW', '', 'Edmonton', 'Alberta', 'T6W 0L5', 'Food - Un-permitted', '1']
['2307185 Alberta Inc', '12845 - 140 Avenue', '', 'Edmonton', 'AB', 'T6V 1P!', 'Food - General', '1']
['24/7 Convenience Store - Convenience Store', '1903 105 St NW', '', 'Edmonton', 'AB', 'T6J 5V9', 'Food - General', '8']
['2423 28B Avenue NW - Commercial Kitchen', '2423 28B Avenue NW', '', 'Edmonton', 'Alberta', 'T6T 2A3', 'Food - General', '0']
['2Dhagax Convenience Store', '9420 118 Avenue NW', '', 'Edmonton', 'AB', 'T5G 0N6', 'Food - General', '1']
['3039 Township Road 485', '3039 Township Road 485', '', 'Leduc County', 'Alberta', 'T0C', 'Food - General', '1']
['3812 - 20 Avenue - Foster Home - Arongo - Kitchen', '30 - 3812 20 Avenue', '', 'Edmonton', 'Alberta', '', 'Food - Un-permitted', '1']
['3J Variety Store - Convenience Store', '10821 23 Avenue NW', '', 'Edmonton', 'AB', 'T6J 7B5', 'Food - General', '2']
['4 Wise Monkeys Catering - Restaurant', '1739 Towne Centre Boulevard NW', '', 'Edmonton', 'AB', 'T6R 0T4', 'Food - General', '2']
['40 Acres - Restaurant', '13315 Buena Vista Rd NW', '', 'Edmonton', 'Alberta', 'T5J 2R7', 'Food - General', '5']
['420 Premium Market', '200 - 10 McKenney Avenue', '', 'St. Albert', 'AB', 'T8N 5S8', 'Food - Un-permitted', '0']
['4460 Prowse Road SW', '4460 Prowse Road SW', '', 'Edmonton', 'Alberta', 'T6W 3A5', 'Food - Un-permitted', '0']
['50 St. Convenience Store', '5010 76 Avenue NW', '', 'Edmonton', 'AB', 'T6B 0A6', 'Food - General', '2']
['50 Street Petro Pass', '8025 50 Street NW', '', 'Edmonton', 'AB', 'T6B 1E5', 'Food - General', '1']
['5-1-5-0 Smokehouse & Catering', '9 - 54500 Range Road 275', '', 'Sturgeon County', 'Alberta', 'T8R 1Z1', 'Food - General', '2']
['561 Victoria Court Brodhus Bakery', '561 Victoria Court', '', 'Sherwood Park', 'AB', 'T8A 4L2', 'Food - General', '0']
['5th Floor Servery', '1A -  Sir Winston Churchill Square, Court Of Appeal, Edmonton Law Courts NW', '', 'Edmonton', 'AB', 'T5J 0R2', 'Food - General', '1']
['6112 175 Avenue NW - Commercial Kitchen', '6112 175 Avenue NW', '', 'Edmonton', 'AB', 'T5Y 0T4', 'Food - General', '0']
["6A's And J Convenience Store", '9937-63 Avenue', '', 'Edmonton', 'Alberta', 'T6E 6C9', 'Food - General', '1']

Update for multi-page:

I tried to got throught few pages and found out that some of the locators were not found on 2nd and 3rd pages. So, I'm updating the code and adding multi-page approach.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


driver = webdriver.Chrome(executable_path='/snap/bin/chromium.chromedriver')
driver.implicitly_wait(15)
driver.get('https://ephisahs.microsoftcrmportals.com/disclaimer/restaurantinspections/edmonton-facilities/')

num = 1
facilities = []
for page in range(1, 10):
    wait = WebDriverWait(driver, 30)
    wait.until(EC.visibility_of_all_elements_located((By.XPATH, "//tr[@data-name]")))
    cards = driver.find_elements_by_xpath("//tr[@data-name]")

    for card in cards:
        name = card.find_element_by_xpath(".//td[@data-th='Unit Name']").text
        street1 = card.find_element_by_xpath(".//td[@data-th='Site Street 1']").text
        street2 = card.find_element_by_xpath(".//td[@data-th='Site Street 2']").text
        site_city = card.find_element_by_xpath("..//td[@data-th='Site City']").text
        province = card.find_element_by_xpath(".//td[@data-th='Site Province/State']").text
        postal_code = card.find_element_by_xpath(".//td[@data-th='Site Postal Code/Zip Code']").text
        facility_category = card.find_element_by_xpath(".//td[@data-th='Site Postal Code/Zip Code']").text
        inspections = card.find_element_by_xpath(".//td[@data-th='Inspections Completed']").text
        facilities.append([name, street1, street2, site_city, province, postal_code, facility_category, inspections])
    wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, "a[aria-label='Next page']")))
    driver.find_element_by_css_selector("a[aria-label='Next page']").click()
    num = num + 1
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, ".view-grid.has-pagination")))
    print(f"clicked page {num}")

for p in facilities:
    print(p, sep='\n')

driver.close()
driver.quit()
vitaliis
  • 4,082
  • 5
  • 18
  • 40
  • Thank you, this is excellent. Also do you by any chance know how loop through all the pages? I've never worked on looping where the URL doesn't consist the page number – mathgeek May 02 '21 at 19:33
  • You'll need to click next button. Locator for it `a[aria-label='Next page']` – vitaliis May 02 '21 at 19:40
  • Here is one approach for multiple pages https://stackoverflow.com/questions/67186803/how-can-i-append-values-from-more-than-one-page-using-selenium – vitaliis May 02 '21 at 20:17
  • 1
    @mathgeek I've tried to go through few pages and found out that some of the locators were absent on few following pages. Enjoy. You can upvote me second time :) – vitaliis May 03 '21 at 01:42