I am building a Beautiful Soup script that scrapes a news website. One of the strings that is scraped is an image url (img_src). Is there a way to download the images that are scraped?
I am using Python 2.7.16. Here is the script:
# -*- coding: utf-8 -*-
from bs4 import BeautifulSoup
import requests
import time
import csv
source = requests.get('https://jornalnoticias.co.mz/index.php/desporto').text
soup = BeautifulSoup(source, 'lxml')
#prepare csv file
csv_file = open('jornalnoticias.csv', 'a')
csv_writer = csv.writer(csv_file)
csv_writer.writerow(['headline', 'url_src', 'img_src', 'news_src', 'cat', 'epoch'])
news_src = 'Jornal Notícias'
cat = 'Desporto'
epoch = time.time()
#loop over articles
for article in soup.find_all('div' , itemprop='blogPost'):
try:
headline = article.h2.a.text.replace('\t','').encode('utf8')
except Exception as e:
headline = None
try:
url_src = 'https://jornalnoticias.co.mz' + article.find('a' , href = True)['href']
except Exception as e:
url_src = None
try:
img_src = 'https://jornalnoticias.co.mz' + article.find('a' , class_ = 'hover-zoom')['href']
except Exception as e:
img_src = None
print(headline)
print(url_src)
print(img_src)
#write csv
csv_writer.writerow([headline, url_src, img_src, news_src, cat, epoch])
csv_file.close()