Feed dataframe with webscraping

Question

I'mt trying to append some scraped values to a dataframe. I have this code:

import time
import requests
import pandas

import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
import json

# Grab content from URL
url = "https://www.remax.pt/comprar?searchQueryState={%22regionName%22:%22%22,%22businessType%22:1,%22listingClass%22:1,%22page%22:1,%22sort%22:{%22fieldToSort%22:%22ContractDate%22,%22order%22:1},%22mapIsOpen%22:false,%22listingTypes%22:[],%22prn%22:%22%22}"

PATH = 'C:\DRIVERS\chromedriver.exe'
driver = webdriver.Chrome(PATH)
option = Options()
option.headless = False
#chromedriver = 
#driver = webdriver.Chrome(chromedriver)
#driver = webdriver.Firefox() #(options=option)
#driver.get(url)
#driver.implicitly_wait(10)  # in seconds

time.sleep(1)
wait = WebDriverWait(driver, 10)
driver.get(url)

rows = driver.find_elements_by_xpath("//div[@class='row results-list ']/div")
data=[]
for row in rows:
    price=row.find_element_by_xpath(".//p[@class='listing-price']").text
    print(price)
    address=row.find_element_by_xpath(".//p[@class='listing-address']").text
    print(address)
    Tipo=row.find_element_by_xpath(".//p[@class='listing-type']").text
    print(Tipo)
    Area=row.find_element_by_xpath(".//p[@class='listing-area']").text
    print(Area)
    Quartos=row.find_element_by_xpath(".//p[@class='icon-bedroom-full']").text
    print(Quartos)
    data.append([price],[address],[Tipo],[Area],[Quartos])



#driver.quit()

The problem is that it returns the following error:

NoSuchElementException                    Traceback (most recent call last)
<ipython-input-16-9e4d01985cda> in <module>
     49     price=row.find_element_by_xpath(".//p[@class='listing-price']").text
     50     print(price)
---> 51     address=row.find_element_by_xpath(".//p[@class='listing-address']").text
     52     print(address)
     53     Tipo=row.find_element_by_xpath(".//p[@class='listing-type']").text

~\anaconda3\lib\site-packages\selenium\webdriver\remote\webelement.py in find_element_by_xpath(self, xpath)
    349             element = element.find_element_by_xpath('//div/td[1]')
    350         """
--> 351         return self.find_element(by=By.XPATH, value=xpath)
    352 
    353     def find_elements_by_xpath(self, xpath):

~\anaconda3\lib\site-packages\selenium\webdriver\remote\webelement.py in find_element(self, by, value)
    656                 value = '[name="%s"]' % value
    657 
--> 658         return self._execute(Command.FIND_CHILD_ELEMENT,
    659                              {"using": by, "value": value})['value']
    660 

~\anaconda3\lib\site-packages\selenium\webdriver\remote\webelement.py in _execute(self, command, params)
    631             params = {}
    632         params['id'] = self._id
--> 633         return self._parent.execute(command, params)
    634 
    635     def find_element(self, by=By.ID, value=None):

~\anaconda3\lib\site-packages\selenium\webdriver\remote\webdriver.py in execute(self, driver_command, params)
    319         response = self.command_executor.execute(driver_command, params)
    320         if response:
--> 321             self.error_handler.check_response(response)
    322             response['value'] = self._unwrap_value(
    323                 response.get('value', None))

~\anaconda3\lib\site-packages\selenium\webdriver\remote\errorhandler.py in check_response(self, response)
    240                 alert_text = value['alert'].get('text')
    241             raise exception_class(message, screen, stacktrace, alert_text)
--> 242         raise exception_class(message, screen, stacktrace)
    243 
    244     def _value_or_default(self, obj, key, default):

NoSuchElementException: Message: no such element: Unable to locate element: {"method":"xpath","selector":".//p[@class='listing-address']"}
  (Session info: chrome=90.0.4430.72)

But when I try only with the first element it returns a list of prices. What is the difference if I'm giving it the differente places in the dataframe and I use the same type of path?

Please post the HTML of the page under test to diagnose this error. — C. Peck, Apr 19 '21 at 17:00

vitaliis · Accepted Answer · 2021-04-20T00:19:11.113

The main problem you have are locators. 1 First, compare the locators I use and the ones in your code. 2 Second, Add explicit waits from selenium.webdriver.support import expected_conditions as EC 3 Third, remove unnecessary code.

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


driver = webdriver.Chrome(executable_path='/snap/bin/chromium.chromedriver')
url = "https://www.remax.pt/comprar?searchQueryState={%22regionName%22:%22%22,%22businessType%22:1,%22listingClass%22:1,%22page%22:1,%22sort%22:{%22fieldToSort%22:%22ContractDate%22,%22order%22:1},%22mapIsOpen%22:false,%22listingTypes%22:[],%22prn%22:%22%22}"
driver.get(url)
wait = WebDriverWait(driver, 10)
wait.until(EC.presence_of_all_elements_located((By.XPATH, "//div[@class='row results-list ']/div")))
rows = driver.find_elements_by_xpath("//div[@class='row results-list ']/div")
data = []
for row in rows:
    price_p = row.find_element_by_xpath(".//p[@class='listing-price']").text
    address = row.find_element_by_xpath(".//h2[@class='listing-address']").text
    type = row.find_element_by_xpath(".//li[@class='listing-type']").text
    area = row.find_element_by_xpath(".//li[@class='listing-area']").text
    quartos = row.find_element_by_xpath(".//li[@class='listing-bedroom']").text
    data.append([price, address, price_p, area, quartos])
driver.close()
driver.quit()

Please note that I did in on Linux. Your Chrome driver location is different. Also, to print the list use:

for p in data:
    print(p.text, sep='\n')

You can modify it as you like. I received the following output:

['240 000 €', 'Lisboa -  Lisboa, Carnide', 'Apartamento', '54 m\n2', '1']
['280 000 €', 'Lisboa -  Lisboa, Beato', 'Apartamento', '80 m\n2', '1']
['285 000 €', 'Lisboa -  Lisboa, Beato', 'Apartamento', '83 m\n2', '1']
['290 000 €', 'Lisboa -  Lisboa, Beato', 'Apartamento', '85 m\n2', '1']
['280 000 €', 'Lisboa -  Lisboa, Beato', 'Apartamento', '80 m\n2', '1']
['290 000 €', 'Lisboa -  Lisboa, Beato', 'Apartamento', '85 m\n2', '1']
['285 000 €', 'Lisboa -  Lisboa, Beato', 'Apartamento', '83 m\n2', '1']
['80 000 €', 'Santarém -  Cartaxo, Ereira e Lapa', 'Terreno', '12440 m\n2', '1']
['260 000 €', 'Lisboa -  Sintra, Queluz e Belas', 'Prédio', '454 m\n2', '1']
['37 500 €', 'Santarém -  Torres Novas, Torres Novas (Santa Maria, Salvador e Santiago)', 'Prédio', '92 m\n2', '1']
['505 000 €', 'Lisboa -  Sintra, Algueirão-Mem Martins', 'Duplex', '357 m\n2', '1']
['135 700 €', 'Lisboa -  Mafra, Milharado', 'Terreno', '310 m\n2', '1']
['132 800 €', 'Lisboa -  Mafra, Milharado', 'Terreno', '310 m\n2', '1']
['133 440 €', 'Lisboa -  Mafra, Milharado', 'Terreno', '310 m\n2', '1']
['179 000 €', 'Lisboa -  Mafra, Milharado', 'Terreno', '310 m\n2', '1']
['75 000 €', 'Lisboa -  Vila Franca de Xira, Vila Franca de Xira', 'Apartamento', '52 m\n2', '1']
['575 000 €', 'Porto -  Matosinhos, Matosinhos e Leça da Palmeira', 'Apartamento', '140 m\n2', '1']
['35 000 €', 'Setúbal -  Almada, Caparica e Trafaria', 'Outros - Habitação', '93 m\n2', '1']
['550 000 €', 'Leiria -  Alcobaça, Évora de Alcobaça', 'Moradia', '160 m\n2', '1']
['550 000 €', 'Lisboa -  Loures, Santa Iria de Azoia, São João da Talha e Bobadela', 'Moradia', '476 m\n2', '1']

data.append([price,address,type,area,quartos]) You could also append it into a list like so. — Arundeep Chohan, Apr 19 '21 at 18:36
Xpathing from an element needs a . otherwise you'll always retrieve from the first row. — Arundeep Chohan, Apr 19 '21 at 19:49
I thought it worked but I was wrond because, later, when i declare "data" the result is gives me somthing like this: [, And I was expecting a dataframe with all the correspondent fields. — jps17183, Apr 19 '21 at 20:59
data is basically a list of elements. How you are going to modify it is a different question. — vitaliis, Apr 19 '21 at 21:09
All what is left - is to convert the list to pandas dataframe. Stackoverflow already has many answered questions on how to accomplish this. I also updated one locator, it was not correct before, the last one — vitaliis, Apr 19 '21 at 23:31
@ArundeepChohan You were right.`.` is mandatory locator part in this case. — vitaliis, Apr 19 '21 at 23:32

Feed dataframe with webscraping

1 Answers1

Linked