When trying to scrape multiple pages of this website, I get no content in return. I usually check to make sure all the lists
I'm creating are of equal length, but all are coming back as len = 0
.
I've used similar code to scrape other websites, so why does this code not work correctly?
Some solutions I've tried, but haven't worked for my purposes: requests.Session()
solutions as suggested in this answer, .json
as suggested here.
for page in range(100, 350):
page = requests.get("https://www.ghanaweb.com/GhanaHomePage/election2012/parliament.constituency.php?ID=" + str(page) + "&res=pm")
page.encoding = page.apparent_encoding
if not page:
pass
else:
soup = BeautifulSoup(page.text, 'html.parser')
ghana_tbody = soup.find_all('tbody')
sleep(randint(2,10))
for container in ghana_tbody:
#### CANDIDATES ####
candidate = container.find_all('div', class_='can par')
for data in candidate:
cand = data.find('h4')
for info in cand:
if cand is not None:
can2 = info.get_text()
can.append(can2)
#### PARTY NAMES ####
partyn = container.find_all('h5')
for data in partyn:
if partyn is not None:
partyn2 = data.get_text()
pty_n.append(partyn2)
#### CANDIDATE VOTES ####
votec = container.find_all('td', class_='votes')
for data in votec:
if votec is not None:
votec2 = data.get_text()
cv1.append(votec2)
#### CANDIDATE VOTE SHARE ####
cansh = container.find_all('td', class_='percent')
for data in cansh:
if cansh is not None:
cansh2 = data.get_text()
cvs1.append(cansh2)
#### TOTAL VOTES ####`
tfoot = soup.find_all('tr', class_='total')
for footer in tfoot:
fvote = footer.find_all('td', class_='votes')
for data in fvote:
if fvote is not None:
fvote2 = data.get_text()
fvoteindiv = [fvote2]
fvotelist = fvoteindiv * (len(pty_n) - len(vot1))
vot1.extend(fvotelist)
Thanks in advance for your help!