I'm trying to loop through six Wikipedia pages to get a list of every song linked. It gives me this error when I run it in Terminal:
Traceback (most recent call last):
File "scrapeproject.py", line 31, in <module>
print (getTableLinks(my_url))
File "scrapeproject.py", line 20, in getTableLinks
html = urlopen(my_url)
File "/Users/adriana/Software/Python-3.5.1/mybuild/lib/python3.5/urllib/request.py", line 162, in urlopen
return opener.open(url, data, timeout)
File "/Users/adriana/Software/Python-3.5.1/mybuild/lib/python3.5/urllib/request.py", line 456, in open
req.timeout = timeout
AttributeError: 'NoneType' object has no attribute 'timeout'
I think this is because a None keeps showing up when I print the song list. Anyone have any suggestions?
Code:
from urllib.request import urlopen
from bs4 import BeautifulSoup
import sys
import http.client
main = "https://en.wikipedia.org/wiki/Billboard_Year-End_Hot_100_singles_of_"
year = 2009
def createUrl(main, year):
for i in range(0, 6): # increment years so i can get each link
year += 1
print ("\n\n", year, "\n\n")
fullUrl = main + str(year)
return fullUrl
my_url = createUrl(main, year) # this is how i make createUrl a variable to be used in other functions
def getTableLinks(my_url): # there is a random none appearing in my code
# i think the problem is between here...
html = urlopen(my_url)
bsObj = BeautifulSoup(html.read(), "html.parser")
tabledata = bsObj.find("table", {"class":"wikitable"}).find_all("tr")
# ...and here
for table in tabledata:
try:
links = table.find("a")
if 'href' in links.attrs:
print (links.attrs['href'])
except:
pass
print (getTableLinks(my_url))