I'm trying to scrape 100 reviews from a movie on Imdb using Selenium. The first page of the reviews scrapes just fine but after the program clicks on "load more", the program scrapes all the data from the beginning resulting in duplicates. I believe this is because the next set of reviews continue along the same page as opposed to opening a new review page. I would appreciate your help in figuring this out. Here is my code which is constructed inside a function:
global movie_title, review_name
movie_title = []
review_name = []
wdpath = 'chromedriver.exe'
driver = webdriver.Chrome(wdpath)
driver.get(url)
driver.maximize_window()
while True:
driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
reviews = driver.find_elements_by_css_selector('[class*= "lister-item mode-detail imdb-user-review"]')
for review in reviews:
try:
username = review.find_element_by_css_selector('[class = "display-name-link"]').text
review_name.append(username)
except:
username = None
try:
title = review.find_element_by_css_selector('[class = "title"]').text
movie_title.append(title)
except:
title = None
try:
rating = review.find_element_by_css_selector('.rating-other-user-rating span')
star_rating.append(rating.text)
if len(star_rating) == 100:
break
except:
star_rating.append('No rating')
if len(star_rating) == 100:
break
nextbutton = WebDriverWait(driver,5).until(EC.presence_of_element_located((By.CLASS_NAME,'ipl-load-more__button')))
if len(star_rating) == 100:
break
nextbutton.click()
time.sleep(2)
driver.close()