I'm using the following code to change the user-agent string, but I'm wondering whether or not this will change the user-agent string for each and every browser.get
request?
ua_strings = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/11.1.1 Safari/605.1.15',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
...
]
def parse(self, response):
profile = webdriver.FirefoxProfile()
profile.set_preference('general.useragent.override', random.choice(ua_string))
options = Options()
options.add_argument('-headless')
browser = webdriver.Firefox(profile, firefox_options=options)
browser.get(self.start_urls[0])
hrefs = WebDriverWait(browser, 60).until(
EC.visibility_of_all_elements_located((By.XPATH, '//div[@class="discoverableCard"]/a'))
)
pages = []
for href in hrefs:
pages.append(href.get_attribute('href'))
for page in pages:
browser.get(page)
""" scrape page """
browser.close()
Or will I have to browser.close()
and then create new instances of browser
in order to use new user-agent strings for each request?
for page in pages:
browser = webdriver.Firefox(profile, firefox_options=options)
browser.get(page)
""" scrape page """
browser.close()