I'm trying to create a web scraper for a website (https://pokemondb.net/pokedex/national) that copies a list of images and saves them in a directory. Everything seems to work, except that instead of picking up the 800+ items that I was hoping it would, it only picks up 12. I've tried using selenium's implicit_wait
, but it doesn't seem to work. I would like it to scrape every picture on the page.
Below is my code:
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import shutil
import os
import requests
def spritescrape(driver):
sprites_list = driver.find_elements_by_tag_name('img')
sprite_srcs = [sprite.get_attribute('src') for sprite in sprites_list]
return sprite_srcs
def download_images(srcs, dirname):
for index, src in enumerate(srcs):
response = requests.get(src, stream=True)
save_image(response, dirname, index)
del response
def save_image(image, dirname, suffix):
with open('{dirname}/img_{suffix}.jpg'.format(dirname=dirname, suffix=suffix), 'wb') as out_file:
shutil.copyfileobj(image.raw, out_file)
def make_dir(dirname):
current_path = os.getcwd()
path = os.path.join(current_path, dirname)
if not os.path.exists(path):
os.makedirs(path)
if __name__ == '__main__':
chromeexe_path = r'C:\code\Learning Python\Scrapers\chromedriver.exe'
driver = webdriver.Chrome(executable_path=chromeexe_path)
driver.get(r'https://pokemondb.net/pokedex/national')
driver.implicitly_wait(10)
sprite_links = spritescrape(driver)
dirname = 'sprites'
make_dir(dirname)
download_images(sprite_links, dirname)
I've heard that some websites can be built in ways that prevent scraping, and I wonder if this is the case for this website. I'm very new to coding, so any help with getting all of the images would be greatly appreciated!