My goal is to convert all html codes embedded in my csv file and convert those to pdf form. But i am having some issue on other html codes and it cannot proceed with conversion to pdf. Here's the current code:
import csv
import os
import pdfkitc #imported pdfkit for html to pdf conversion
# path to the raw CSV file
csv_file_path = r"C:\Users\jdayao\Desktop\html_to_pdf\emails_body_202303211635.csv"
# stored directory here the PDF files will be dumped
pdf_dir_path = r"C:\Users\jdayao\Desktop\sample_html"
# to ensure the PDF directory exists
os.makedirs(pdf_dir_path, exist_ok=True)
options = { 'page-height': '1500.00', 'page-width': '210.00', 'encoding': 'utf8'}
config = pdfkit.configuration(wkhtmltopdf = r"C:\Program
Files\wkhtmltopdf\bin\wkhtmltopdf.exe")
with open(csv_file_path, newline='', encoding = "utf8") as csv_file:
reader = csv.reader(csv_file)
header = next(reader) # Skip the header row
for row in reader:
html_code = row[2]
# this will generate a unique filename for the HTML and PDF files
filename_base = f'{header[2]}_{row[0]}'
html_file_path = f'{pdf_dir_path}\{filename_base}.html'
pdf_file_path = f'{pdf_dir_path}\{filename_base}.pdf'
# Write the HTML code to the HTML file
with open(html_file_path, 'w') as html_file:
html_file.write(html_code)
# added try/except for the meantime to handle converting errors on some rows/html codes
try:
pdfkit.from_file(html_file_path, pdf_file_path, configuration = config, options = options) # include configuration = config argument
print(f'Converted {html_file_path} to {pdf_file_path}') # to indicate if conversion is successful
except:
print(f'Converted {html_file_path} to {pdf_file_path} but with some error in wkhtmltopdf') # to indicate if conversion is not successful
print('Conversion complete.') # to indicate conversion completion
Error:
Traceback (most recent call last):
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec
exec(code, globals, locals)
File c:\users\jdayao\appdata\local\programs\python\python311\lib\site-packages\untitled6.py:52
html_file.write(html_code)
File ~\AppData\Local\Programs\Python\Python311\Lib\encodings\cp1252.py:19 in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u202f' in position 3929: character maps to <undefined>
Now having this traceback error:
Traceback (most recent call last):
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\spyder_kernels\py3compat.py:356 in compat_exec
exec(code, globals, locals)
File c:\users\jdayao\appdata\local\programs\python\python311\lib\site-packages\untitled11.py:64
pdfkit.from_file(html_file_path, pdf_file_path, configuration = config, options = options) # include configuration = config argument
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdfkit\api.py:51 in from_file
return r.to_pdf(output_path)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdfkit\pdfkit.py:201 in to_pdf
self.handle_error(exit_code, stderr)
File ~\AppData\Local\Programs\Python\Python311\Lib\site-packages\pdfkit\pdfkit.py:155 in handle_error
raise IOError('wkhtmltopdf reported an error:\n' + stderr)
OSError: wkhtmltopdf reported an error:
Exit with code 1 due to network error: ProtocolUnknownError