1

I am scraping a website but when i get 50 iterations from my loop, it requires me to enter a captcha or says that i didint enable javascript in my browser. I am using selenium and beautifulsoup by the way. Does anyone know how to solve this? Is there a way to evade captcha? I can scrape about a thousand only because the browser cant get the specific element because of the capctha elements. My python code

from selenium import webdriver
import urllib
import urllib.request
import string
from bs4 import BeautifulSoup
import mysql.connector
import time
from selenium.webdriver.chrome.options import Options

options = webdriver.ChromeOptions()
options.add_argument("--enable-javascript")
options.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36")

chrome_path = r"C:\chromedriver.exe"

driver = webdriver.Chrome(chrome_path,options=options)
#total number of pages
pageNumber = 2324
#db connection
mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  passwd="",
  database='property_guru'
)

mycursor = mydb.cursor()

#loop pagination
for i in range(0,pageNumber,1):
    pageNumber = str(i)

    url = "https://www.propertyguru.com.sg/property-for-sale/"+pageNumber+"?order=desc&property_type=N&property_type_code%5B0%5D=CONDO&property_type_code%5B1%5D=APT&property_type_code%5B2%5D=WALK&property_type_code%5B3%5D=CLUS&property_type_code%5B4%5D=EXCON&sort=date"
    driver.get(url)
    html = driver.page_source
    soup = BeautifulSoup(html,'lxml')

    list_title = soup.select('.listing-card')
    # print(list_title)
    # time.sleep(15)
    driver.implicitly_wait(10)

    for q in list_title:
        name = q.find(attrs={'class':'nav-link'})
        location = q.find(attrs={'itemprop':'streetAddress'})
        listprice = q.find(attrs={'class':'list-price pull-left'})
        listAgent = q.find(attrs={'class':'agent-name'})
        agentPhone = q.find(attrs={'class':'listing-agent-phone-number'})
        var = name.attrs['title']
        firstAgentText = listAgent.text


        #FINAL VAR
        propName = var.replace('For Sale -', '')
        propLoc = location.text
        propPrice = listprice.text
        finalAgent = firstAgentText.replace('Listed by','')
        finalPhone = agentPhone.text



        print('record inserted')

        sql = """INSERT INTO guru_listings (property_name,property_address,property_price,listed_by,contact) VALUES (%s,%s,%s,%s,%s)"""
        mycursor.execute(sql,(propName,str(propLoc),str(propPrice),str(finalAgent),str(finalPhone)))
        mydb.commit()

        # time.sleep(5)

driver.close()



0 Answers0