I'd like to crawl through Tesla's list of superchargers and open each individual page to record the number of connectors and charging rates. This is one of my first programs so I'm sure I'm doing a few things wrong, but I can't get past the HTTP Error 403 when I use urlopen to open multiple urls. Any help would be greatly appreciated!
from urllib.request import urlopen
from bs4 import BeautifulSoup
import ssl
import csv
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
url = 'https://www.tesla.com/findus/list/superchargers/United%20States'
html = urlopen(url, context=ctx).read()
soup_main = BeautifulSoup(html, "html.parser")
data = []
for tag in soup_main('a'):
if '/findus/location/supercharger/' in tag.get('href',None):
url_sc = 'https://www.tesla.com' + tag['href']
html_sc = urlopen(url_sc, context=ctx).read()
soup_sc = BeautifulSoup(html_sc, "html.parser")
address = soup_sc.find('span', class_='street-address').string
city = soup_sc.find('span', class_='locality').string[:-5]
state = soup_sc.find('span', class_='locality').string[-3:]
details = soup_sc.find_all('p')[1].contents[-1]
data.append([address, city, state, details])
header = ['Address', 'City', 'State', 'Details']
with open('datapull.csv', 'w') as fp:
writer = csv.writer(fp, delimiter=',')
writer.writerow(header)
for row in data:
writer.writerow(row)