I am using below codes to download online PDF files. It works fine for most files.
# -*- coding: utf8 -*-
import urllib2
import shutil
import urlparse
import os
def download(url, fileName=None):
def getFileName(url,openUrl):
if 'Content-Disposition' in openUrl.info():
cd = dict(map(
lambda x: x.strip().split('=') if '=' in x else (x.strip(),''),
openUrl.info()['Content-Disposition'].split('')))
if 'filename' in cd:
filename = cd['filename'].strip("\"'")
if filename: return filename
return os.path.basename(urlparse.urlsplit(openUrl.url)[2])
r = urllib2.urlopen(urllib2.Request(url))
try:
fileName = fileName or getFileName(url,r)
with open(fileName, 'wb') as f:
shutil.copyfileobj(r,f)
finally:
r.close()
however for some files with special characters in the address, for example:
download(u'http://www.poemhunter.com/i/ebooks/pdf/aogán_ó_rathaille_2012_5.pdf', 'c:\\the_file.pdf')
it give a Unicode error:
UnicodeEncodeError: 'ascii' codec can't encode character u'\xe1' in position 21: ordinal not in range(128)
How can I solve this problem? Thanks.