I ran the codes below. Most of the codes work, but when I ran the "for elm in collect" block, I got an error: HTTPError: HTTP Error 403: Forbidden. Can anyone help with this? Thanks!!
import requests
from bs4 import BeautifulSoup
import urllib.request
import os
resp = requests.get('https://www.williams.edu/institutional-research/common-data-set/',
headers={'User-Agent': 'Mozilla/5.0'})
soup = BeautifulSoup(resp.text, 'html5lib')
links = [a['href'] for a in soup.select('li a[href]')]
collect = []
for link in links:
if "https://www.williams.edu/institutional-research/files/" in link:
collect.append(link)
for elm in collect:
def main():
download_file(elm) # the elm is an url.
def download_file(download_url): # the download_url is the elm.
save_path = 'C:/Users/WM'
file_name = elm.split("/")[-1]
complete_name = os.path.join(save_path, file_name)
response = urllib.request.urlopen(download_url)
file = open(complete_name, 'wb')
file.write(response.read())
file.close()
print("Completed")
if __name__ == "__main__":
main()