0

I use this code to download jpg file without any problem. But as you can see the following page source include a lot of image with path blank.gif.

<a href="/en/chowchow-puppy-sleeping-dogs-pet-448311/"><img src="/static/img/blank.gif"

My question: Is it possible to add a detect function when it is blank.gif then auto download image file with 640*426 from "https://pixabay.com/en/chowchow-puppy-sleeping-dogs-pet-448311/" and how to archive ??

import random
import requests
from bs4 import BeautifulSoup

# got from http://stackoverflow.com/a/16696317
def download_file(url):
    local_filename = url.split('/')[-1]
    print("Downloading {} ---> {}".format(url, local_filename))
    # NOTE the stream=True parameter
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024): 
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
    return local_filename

def Download_Image_from_Web(url):
    source_code = requests.get(url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text, "html.parser")
    for link in soup.findAll('img'):
        image_links = link.get('src')
        if not image_links.startswith('http'):
            image_links = url + '/' + image_links
        download_file(image_links)

Download_Image_from_Web("https://pixabay.com/en/photos/?q=sleeping+puppy&hp=&image_type=&cat=&min_width=&min_height=")
Eric
  • 732
  • 4
  • 13

1 Answers1

2

Updated version. Read comments for additional info.

import random
import requests
from bs4 import BeautifulSoup

# got from http://stackoverflow.com/a/16696317
def download_file(url):
    local_filename = url.split('/')[-1]
    print("Downloading {} ---> {}".format(url, local_filename))
    # NOTE the stream=True parameter
    r = requests.get(url, stream=True)
    with open(local_filename, 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
    return local_filename

def Download_Image_from_Web(url):
    source_code = requests.get(url)
    plain_text = source_code.text
    soup = BeautifulSoup(plain_text, "html.parser")
    for link in soup.findAll('img'):
        image_links = link.get('src')
        if image_links.endswith('blank.gif'):
            image_links = link.get('data-lazy')
        if not image_links.startswith('http'):
            image_links = url + '/' + image_links
        download_file(image_links)

Download_Image_from_Web("https://pixabay.com/en/photos/?q=sleeping+puppy&hp=&image_type=&cat=&min_width=&min_height=")
Roman Mindlin
  • 852
  • 1
  • 8
  • 12
  • May I ask why you put a specific url in this function? And I should change file to different name when it downloading, right? – Eric Sep 15 '17 at 12:36
  • Oh, it seems that I've read your question not carefully. I decided that you want to replace any blank.gif with this puppy jpeg. But now I've checked your url one more time and it looks like that you can use just 'data-lazy' attribute when 'src' contains 'blank.gif'. So, please check updated code in my answer – Roman Mindlin Sep 15 '17 at 13:27
  • Fantastic. Really works with one more condition. thanks – Eric Sep 15 '17 at 18:14