I got this code from How to download a file using python in a 'smarter' way??
But it throws an error:
in download
r.close()
UnboundLocalError: local variable 'r' referenced before assignment
Also I would like to add a condition that the file to be downloaded should be pdf only.
import urllib2
import shutil
import urlparse
import os
def download(url, fileName=None):
def getFileName(url,openUrl):
if 'Content-Disposition' in openUrl.info():
# If the response has Content-Disposition, try to get filename from it
cd = dict(map(lambda x: x.strip().split('=') if '=' in x else (x.strip(),''),openUrl.info()['Content-Disposition'].split(';')))
if 'filename' in cd:
filename = cd['filename'].strip("\"'")
if filename: return filename
# if no filename was found above, parse it out of the final URL.
return os.path.basename(urlparse.urlsplit(openUrl.url)[2])
req = urllib2.Request(url)
try:
r = urllib2.urlopen(req)
except urllib2.HTTPError, e:
print e.fp.read()
try:
fileName = fileName or getFileName(url,r)
with open(fileName, 'wb') as f:
shutil.copyfileobj(r,f)
finally:
r.close()
download('http://www.altria.com/Documents/Altria_10Q_Filed10242013.pdf#?page=24')
This works completely fine with url : http://www.gao.gov/new.items/d04641.pdf So my question is why doesn't it work for some urls but works completely fine with urls like the one mentioned above.