I've tried to scrape the page based on your code and ended up with success.
I've decided to scroll the page by 500px per step and then remove all duplications and empty strings.
import selenium
import time
from selenium import webdriver
path = "./chromedriver"
driver = webdriver.Chrome(executable_path=path)
url = 'https://www.deezer.com/fr/playlist/2560242784'
driver.get(url)
all_music = []
last_scroll_y = driver.execute_script("return window.scrollY")
for i in range(0, 100):
try :
#first scrape
musics = driver.find_elements_by_class_name('BT3T6')
for music in musics:
all_music.append(music.text)
#then scroll down +500px
driver.execute_script("window.scrollTo(0, window.scrollY+500);")
time.sleep(0.2) #some wait for the new content (200ms)
current_scroll_y = driver.execute_script("return window.scrollY")
# exit the loop if the page is not scrolled any more
if current_scroll_y == last_scroll_y:
break
last_scroll_y = current_scroll_y
except Exception as e:
print(e)
# this removes all empty strings
all_music = list(filter(None, all_music))
# this removes all duplications, but keeps the order
# based on https://stackoverflow.com/a/17016257/5226491
# python 3.7 required
all_music = list(dict.fromkeys(all_music))
# this also removes all duplications, but the order will be changed
#all_music = list(set(all_music))
for m in all_music:
print(m)
print('Total music found: ' + len(all_music))
This works ~ 60-90 seconds and scrape 1000+ items.
Note: it works fine with the active window, and also works in headless mode, but it finish scraping when I collapse the browser window.. So run this with headless
chrome option
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.headless = True
driver = webdriver.Chrome(CHROMEDRIVER_PATH, options=options)
or do not collapse the window.