I'm getting the following unicode encoding error .
When I'm running the program presented below, I'm getting a unicode encoding-related error
import bs4
import requests
from xhtml2pdf import pisa # import python module
from xhtml2pdf.config.httpconfig import httpConfig
res = requests.get("https://www.insightsonindia.com/2018/06/04/insights-daily-current-affairs-04-june-2018/")
soup = bs4.BeautifulSoup(res.text, 'lxml')
pf = soup.find("div", class_="pf-content")
sourceHtml =str(pf)
outputFilename = "test.pdf"
def convertHtmlToPdf(sourceHtml, outputFilename):
# open output file for writing (truncated binary)
httpConfig.save_keys('nosslcheck', True)
resultFile = open(outputFilename, "w+b")
# convert HTML to PDF
pisaStatus = pisa.CreatePDF(sourceHtml, dest=resultFile, encoding="utf-8")
# close output file
resultFile.close() # close output file
# return True on success and False on errors
return pisaStatus.err
# Main program
if __name__ == "__main__":
pisa.showLogging()
convertHtmlToPdf(sourceHtml, outputFilename)
The error is given below
self._output(request.encode('ascii'))
UnicodeEncodeError: 'ascii' codec can't encode character '\u2019' in position 37: ordinal not in range(128)
I'm trying to download a portion of a website using xhtml2pdf. To do that I used bs4 and scrape the site and store it. Then save it into pdf by using xhtml2pdf. Most of the time it worked like charm. But for this instance it is giving me error. Link to the full code in github is given below
Link to full code is available here
xhtml2pdf is encoding with ascii, Since my html file contain non ascii characters it is showing error. And I don't know how to change the encoder in xhtml2pdf. Omitting non-ascii character is not not an option. If I ignore it then link to the image will be corrupted and image will not show in pdf.
complete traceback
```Traceback (most recent call last):
File "test3.py", line 80, in
convertHtmlToPdf(sourceHtml, outputFilename)
File "test3.py", line 68, in convertHtmlToPdf
pisaStatus = pisa.CreatePDF(sourceHtml, dest=resultFile, encoding= 'utf-8')
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\document.py", line 97, in pisaDocument
encoding, context=context, xml_output=xml_output)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\document.py", line 59, in pisaStory
pisaParser(src, context, default_css, xhtml, encoding, xml_output)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\parser.py", line 759, in pisaParser
pisaLoop(document, context)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\parser.py", line 700, in pisaLoop
pisaLoop(node, context, path, **kw)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\parser.py", line 644, in pisaLoop
pisaLoop(nnode, context, path, **kw)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\parser.py", line 644, in pisaLoop
pisaLoop(nnode, context, path, **kw)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\parser.py", line 644, in pisaLoop
pisaLoop(nnode, context, path, **kw)
[Previous line repeated 2 more times]
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\parser.py", line 514, in pisaLoop
attr = pisaGetAttributes(context, node.tagName, node.attributes)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\parser.py", line 124, in pisaGetAttributes
nv = c.getFile(nv)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\context.py", line 818, in getFile
return getFile(name, relative or self.pathDirectory)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\util.py", line 738, in getFile
file = pisaFileObject(*a, **kw)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\site-packages\xhtml2pdf\util.py", line 644, in init
conn.request("GET", path)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 1229, in request
self._send_request(method, url, body, headers, encode_chunked)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 1240, in _send_request
self.putrequest(method, url, **skips)
File "C:\Users\Ananthu\AppData\Local\Programs\Python\Python37-32\lib\http\client.py", line 1107, in putrequest
self._output(request.encode('ascii'))
UnicodeEncodeError: 'ascii' codec can't encode character '\u2019' in position 37: ordinal not in range(128)