I have two loops Id like to integrate into my script but not sure how as I am still learning all this.
Loop 1) An outside list of book authors; remove this from the code. The list will get longer over time so moving it to an outside file would be helpful. How can I reference a list and add a variable in the code to run through each author in the search?
Loop 2) Im having to repeat my code for each author at this stage. How can I write it once and have it repeat once the search is done for the next author.
*The end goal here is to export the search in HTML format so it easily read through.
Thanks!
from bs4 import BeautifulSoup
import urllib.request
import time
#Loop 1
var1 = 'Stephen%20King'
var2 = 'J.%20K.%20Rowling'
var3 = 'James%20Patterson'
var4 = 'John%20Grisham'
timestr = time.strftime("%m-%d-%Y")
#Loop 2
file = open('/var/script/exp/exp_' + timestr + '.html', 'a+')
with open('/var/script/exp/exp_' + timestr + '.html', 'a') as file_1, open('/var/script/src/header.html', 'r') as file_2:
for line in file_2:
file_1.write(line)
with open('/var/script/exp/exp_' + timestr + '.html', 'a') as file_1, open('/var/script/src/subheader.html', 'r') as file_2:
for line in file_2:
file_1.write(line)
for i in range(5):
url = 'https://www.example.com/Listings?st=' + var1 + '&sg=&c=&s=&lp=0&hp=999999&p={}'.format(i)
source = urllib.request.urlopen(url)
soup = BeautifulSoup(source, 'html.parser')
for products in soup.find_all('li', class_='widget'):
image = products.find('img', class_='lazy-load')
itemurl = products.find('a', class_='product')
title = products.find('div', class_='title').text
countdown = products.find(class_='product-countdown')
price = products.find(class_='product-price').find(class_="price").text
file = open('/var/script/exp/exp_' + timestr + '.html', 'a+')
file.write('<div class="col-md-15 col-xs-3">')
file.write('<div class="card mb-4 box-shadow">')
file.write('<img class="card-img-top" src="')
file.write(image.get('data-src'))
file.write('" alt="Card image cap" height="200px">')
file.write('<div class="card-body">')
file.write('<div><p class="card-text"><a href="https://www.example.com' + itemurl.get('href')+'" target="_blank">' + title + '</a>' + price + '</p>')
file.write('<div class="d-flex justify-content-between align-items-center">')
file.write('<div class="btn-group">')
file.write('<button type="button" class="btn btn-sm btn-outline-secondary"><a href="https://www.example.com' + itemurl.get('href')+'" target="_blank">View</a></button>')
file.write('</div><small class="text-muted">')
file.write(countdown.get('data-countdown'))
file.write('</small></div></div></div></div></div>')
print
file.close()
print(var1)
#Repeated Code
file = open('/var/script/exp/exp_' + timestr + '.html', 'a+')
with open('/var/script/exp/exp_' + timestr + '.html', 'a') as file_1, open('/var/script/src/subheader.html', 'r') as file_2:
for line in file_2:
file_1.write(line)
for i in range(5):
url = 'https://www.example.com/Listings?st=' + var2 + '&sg=&c=&s=&lp=0&hp=999999&p={}'.format(i)
source = urllib.request.urlopen(url)
soup = BeautifulSoup(source, 'html.parser')
for products in soup.find_all('li', class_='widget'):
image = products.find('img', class_='lazy-load')
itemurl = products.find('a', class_='product')
title = products.find('div', class_='title').text
countdown = products.find(class_='product-countdown')
price = products.find(class_='product-price').find(class_="price").text
#print(image.get('data-src'))
#file.write('<img src="', + image.get('data-src'), + '">')
file = open('/var/script/exp/exp_' + timestr + '.html', 'a+')
file.write('<div class="col-md-15 col-xs-3">')
file.write('<div class="card mb-4 box-shadow">')
file.write('<img class="card-img-top" src="')
file.write(image.get('data-src'))
file.write('" alt="Card image cap" height="200px">')
file.write('<div class="card-body">')
file.write('<div><p class="card-text"><a href="https://www.example.com' + itemurl.get('href')+'" target="_blank">' + title + '</a>' + price + '</p>')
file.write('<div class="d-flex justify-content-between align-items-center">')
file.write('<div class="btn-group">')
file.write('<button type="button" class="btn btn-sm btn-outline-secondary"><a href="https://www.example.com' + itemurl.get('href')+'" target="_blank">View</a></button>')
file.write('</div><small class="text-muted">')
file.write(countdown.get('data-countdown'))
file.write('</small></div></div></div></div></div>')
print
file.close()
print(var2)
Thanks so much for the response DoubleDouble So here is the updated code where I was able to use a author list, man im surprised it got this!! LOL
from bs4 import BeautifulSoup
import urllib.request
import time
for i in range(5): #searches through pages
lines = open('C:\\Users\\ataylor_dev\\Documents\\VSCODE\\Python\\BeautifulSoup\\Training\\authors.txt').read().splitlines()
for author in lines:
url = 'https://www.example.com/Listings?st=' + author + '&sg=&p={}'.format(i) #adds authors and pages to
print(url)
#how to repeat code with next author
Output:
https://www.example.com/Listings?st=Stephen%20King&sg=&c=&s=&lp=0&hp=999999&p=0
https://www.example.com/Listings?st=J.%20K.%20Rowling&sg=&c=&s=&lp=0&hp=999999&p=0
https://www.example.com/Listings?st=James%20Patterson&sg=&c=&s=&lp=0&hp=999999&p=0
https://www.example.com/Listings?st=John%20Grisham&sg=&c=&s=&lp=0&hp=999999&p=0
John%20Grisham
https://www.example.com/Listings?st=Stephen%20King&sg=&c=&s=&lp=0&hp=999999&p=1
https://www.example.com/Listings?st=J.%20K.%20Rowling&sg=&c=&s=&lp=0&hp=999999&p=1
https://www.example.com/Listings?st=James%20Patterson&sg=&c=&s=&lp=0&hp=999999&p=1
https://www.example.com/Listings?st=John%20Grisham&sg=&c=&s=&lp=0&hp=999999&p=1
John%20Grisham
https://www.example.com/Listings?st=Stephen%20King&sg=&c=&s=&lp=0&hp=999999&p=2
https://www.example.com/Listings?st=J.%20K.%20Rowling&sg=&c=&s=&lp=0&hp=999999&p=2
https://www.example.com/Listings?st=James%20Patterson&sg=&c=&s=&lp=0&hp=999999&p=2
https://www.example.com/Listings?st=John%20Grisham&sg=&c=&s=&lp=0&hp=999999&p=2
John%20Grisham
https://www.example.com/Listings?st=Stephen%20King&sg=&c=&s=&lp=0&hp=999999&p=3
https://www.example.com/Listings?st=J.%20K.%20Rowling&sg=&c=&s=&lp=0&hp=999999&p=3
https://www.example.com/Listings?st=James%20Patterson&sg=&c=&s=&lp=0&hp=999999&p=3
https://www.example.com/Listings?st=John%20Grisham&sg=&c=&s=&lp=0&hp=999999&p=3
John%20Grisham
https://www.example.com/Listings?st=Stephen%20King&sg=&c=&s=&lp=0&hp=999999&p=4
https://www.example.com/Listings?st=J.%20K.%20Rowling&sg=&c=&s=&lp=0&hp=999999&p=4
https://www.example.com/Listings?st=James%20Patterson&sg=&c=&s=&lp=0&hp=999999&p=4
https://www.example.com/Listings?st=John%20Grisham&sg=&c=&s=&lp=0&hp=999999&p=4
John%20Grisham
How do I now repeat the code in the right order? Stephen%20King pages 1 - 5, then on to the next author.. pages 1 - 5.
I feel like im getting pretty darn close, Thanks again!