3

I am scraping this page https://www.elcorteingles.es/supermercado/alimentacion-general/ but every time the browser doesn't load the page or the website cant be reached. How could I fix this problem?

class SuperSpider(scrapy.Spider):
name = 'super'
allowed_domains = ['www.elcorteingles.es/supermercado']
start_urls = ['https://www.elcorteingles.es/supermercado/alimentacion-general/']

def __init__(self):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_path = which("chromedriver")
    driver = webdriver.Chrome(executable_path=chrome_path)
    driver.get("https://www.elcorteingles.es/supermercado/alimentacion-general/")
    driver.maximize_window()
    time.sleep(25)
    self.html = driver.page_source
    driver.close()

def parse(self, response):
    pass

1 Answers1

2
from fake_useragent import UserAgent
ua = UserAgent()
a = ua.random
user_agent = ua.random
print(user_agent)
options.add_argument(f'user-agent={user_agent}')

options.add_argument('--disable-blink-features=AutomationControlled')

options.add_argument('--headless')
options.add_argument("--window-size=1920,1080")
#your code
time.sleep(30)
print(driver.page_source)

This should bypass the bot detection but beware the driver_page_source is huge.

Arundeep Chohan
  • 9,779
  • 5
  • 15
  • 32