0

I have an application with the back-end written in Python that converts html files to pdf files. To do this it implements wkhtmltopdf (https://wkhtmltopdf.org/). It currently works perfectly for creating a single PDF file from an html file and outputs that to the user.

However, I need to be able to create multiple separate PDF files and then merge the files together into a single PDF.

I have been trying to do this using Pypdf2 with the PdfFileMerger() function (https://pythonhosted.org/PyPDF2/PdfFileMerger.html) and haven't been able to do it. I keep getting 'bytes' object has no attribute 'seek'

Here is my current code:

def multi_test_sheet(request, equipment_id):
    if not request.user.is_authenticated:
        return render(request, "jobs/login.html", {"message": None})
    from io import BytesIO
    from PyPDF2 import PdfFileReader, PdfFileMerger
    if not request.user.is_authenticated:
        return render(request, "jobs/login.html", {"message": None})
    equipment = Equipment.objects.filter(pk=equipment_id).first()
    if not job:
        raise Http404("test sheet error. Error code: get job failed")
    pdf_write = PdfFileWriter()
    user_properties=UserProperties.objects.get(user=request.user)
    context = {
        "equipment": equipment,
        "job": equipment.equipments,
        "test_sheet": equipment.sheet_eq,
        "user_properties": user_properties,
        "now": datetime.now().strftime("%b-%d-%Y %H:%M"),
        "now_date": datetime.now().date()
    }
    
    html_sheet = render_to_string('jobs/test_sheet_gear1.html', context)
    html_sheet2 = render_to_string('jobs/test_sheet_gear2.html', context)
    pdf_content1 = pdfkit.from_string(html_sheet, None) 
    pdf_content2 = pdfkit.from_string(html_sheet2, None) 
    pdfadder = PdfFileMerger(strict=False)
    pdfadder.append(pdf_content1)
    pdfadder.append(pdf_content2)
    pdf_adder.write("combined_sheets.pdf")

    response = HttpResponse(pdf_adder, content_type="application/pdf")
    
    response["Content-Disposition"] = f"filename={equipment.site_id}.pdf"

    return response
albertrw
  • 65
  • 1
  • 8
  • 1
    Somewhere a file-like object is expected but a bytes object is found. Investigate the traceback that comes along with the error message to find out where it comes from. – mkrieger1 Apr 06 '22 at 17:29
  • What are you using `BytesIO` for? – mkrieger1 Apr 06 '22 at 17:32
  • What are you using `pdf_write` for? – mkrieger1 Apr 06 '22 at 17:33
  • I found a stackoverflow article that indicated something about using BytesIO but it didn't work so I removed it but forgot to remove the variables. Article: https://stackoverflow.com/questions/60093581/python-docx-attributeerror-bytes-object-has-no-attribute-seek – albertrw Apr 06 '22 at 17:54

1 Answers1

0

I resolved this by hiring someone. The problem was that the objects being passed into the PyPDF2 function called PdfFileMerger() were not being recognized as pdf objects.

To resolve that, save the files (I place them in a folder called interim) using the second argument from the pdfkit.from_string() function, then assign the newly created files to independent variables using open() function, and finally proceed with the merging function by merging those variables.

def multi_test_sheet(request, equipment_id):
    if not request.user.is_authenticated:
        return render(request, "jobs/login.html", {"message": None})
    from io import BytesIO
    from PyPDF2 import PdfFileReader, PdfFileMerger

    if not request.user.is_authenticated:
        return render(request, "jobs/login.html", {"message": None})
    equipment = Equipment.objects.filter(pk=equipment_id).first()
    if not job:
        raise Http404("test sheet error. Error code: get job failed")
    page_quantity = 2 #temporary value for a property that will be added to either equipment or test sheet model
    pdf_file_object = BytesIO()
    stream = BytesIO()
    pdf_write = PdfFileWriter()
    user_properties=UserProperties.objects.get(user=request.user)
    today = datetime.now()
    now=today.strftime("%b-%d-%Y %H:%M")
    now_date = today.date()
    context = {
        "equipment": equipment,
        "job": equipment.equipments,
        "test_sheet": equipment.sheet_eq,
        "user_properties": user_properties,
        "now": now,
        "now_date": now_date
    }
    
    html_sheet = render_to_string('jobs/test_sheet_gear1.html', context)
    html_sheet2 = render_to_string('jobs/test_sheet_gear2.html', context)
    pdf_content1 = pdfkit.from_string(html_sheet, 'interm/test_sheet_gear1.pdf') 
    pdf_content2 = pdfkit.from_string(html_sheet2, 'interm/test_sheet_gear2.pdf')     
    pdfadder = PdfFileMerger(strict=False)
    pdf1_v=PdfFileReader(open('interm/test_sheet_gear1.pdf', 'rb'))
    pdf2_v=PdfFileReader(open('interm/test_sheet_gear2.pdf', 'rb'))
    pdfadder.append(pdf1_v, import_bookmarks=False)
    pdfadder.append(pdf2_v, import_bookmarks=False)
    pdfadder.write('interm/'+str(user_properties.pk)+'combined_sheets.pdf')
    output_file = open('interm/combined_sheets.pdf', 'rb')
    response = HttpResponse(output_file, content_type="application/pdf")
    
    response["Content-Disposition"] = f"filename={equipment.site_id}.pdf"


    return response

albertrw
  • 65
  • 1
  • 8