Good time of the day!
While working on a scraping project, I have faced some issues. Currently I am working on a draft:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
import requests
from bs4 import BeautifulSoup
import time
import random
#driver Path
PATH = "C:\Program Files (x86)\chromedriver"
BASE_URL = "https://www.immoweb.be/en/classified/house/for-sale/ottignies/1340/9308167"
driver = webdriver.Chrome(PATH)
driver.implicitly_wait(30)
driver.get(BASE_URL)
time.sleep(random.uniform(3.0, 5.0))
btn = driver.find_elements_by_xpath('//*[@id="uc-btn-accept-banner"]')[0]
btn.click()
r = requests.get(BASE_URL)
soup = BeautifulSoup(r.content, "html.parser")
def reader(url):
ls = list()
ImmoWebCode = url.find(class_ ="classified__information--immoweb-code").text.strip()
Price = url.find("p", class_="classified__price").find("span",class_="sr-only").text.strip()
Locality = url.find(class_="classified__information--address-row").find("span").text.strip()
HouseType = url.find(class_="classified__title").text.strip()
LivingArea = url.find("th",text="Living area").find_next(class_="classified-table__data").next_element.strip()
RoomsNumber = url.find("th",text="Bedrooms").find_next(class_="classified-table__data").next_element.strip()
Kitchen = url.find("th",text="Kitchen type").find_next(class_="classified-table__data").next_element.strip()
TerraceOrientation = url.find("th",text="Terrace orientation").find_next(class_="classified-table__data").next_element.strip()
TerraceArea = url.find("th",text="Terrace").find_next(class_="classified-table__data").next_element.strip()
Furnished = url.find("th",text="Furnished").find_next(class_="classified-table__data").next_element.strip()
ls.append(Furnished)
OpenFire = url.find("th", text="How many fireplaces?").find_next(class_="classified-table__data").next_element.strip()
GardenOrientation = url.find("th", text="Garden orientation").find_next(class_="classified-table__data").next_element.strip()
ls.append(GardenOrientation)
GardenArea = url.find("th",text="Garden surface").find_next(class_="classified-table__data").next_element.strip()
PlotSurface = url.find("th",text="Surface of the plot").find_next(class_="classified-table__data").next_element.strip()
ls.append(PlotSurface)
FacadeNumber = url.find("th",text="Number of frontages").find_next(class_="classified-table__data").next_element.strip()
SwimmingPoool = url.find("th",text="Swimming pool").find_next(class_="classified-table__data").next_element.strip()
StateOfTheBuilding = url.find("th",text="Building condition").find_next(class_="classified-table__data").next_element.strip()
return ls
print(reader(soup))
I start facing issues, when the code reaches "Locality", I receive an Exception has occurred: AttributeError 'NoneType' object has no attribute 'find
, though it is clear that the mentioned element is present on HTML code. I am adamant, that it is a synthax issue, but I can not put a finger on it.
It brings me to my second question:
Since this code will be running on multiple pages, those pages might not have requested elements. How can I place the None
value if it occurs.
Thank you very much in advance!
Source Code:
<div class="classified__header-secondary-info classified__informations"><p class="classified__information--property">
3 bedrooms
<span aria-hidden="true">|</span>
199
<span class="abbreviation"><span aria-hidden="true">
m² </span> <span class="sr-only">
square meters </span></span></p> <div class="classified__information--financial"><!----> <a href="https://www.immoweb.be/en/credit-application?classified=9308167&icid_to=mortgage&icid_cta=classified-header" class="classified__information--mortgage-banner small"><!----> <span class="mortgage-banner__text">Request your mortgage loan</span></a></div> <div class="classified__information--address"><p><span class="classified__information--address-row"><span>
1340
</span> <span aria-hidden="true">—</span> <span>
Ottignies
</span>
|
</span> <button class="button button--text button--size-small classified__information--address-button">
Ask for the exact address
</button></p></div> <div class="classified__information--immoweb-code">
Immoweb code : 9308167
</div></div>