I have adapted code from this answer and this answer, and came up with the following solution:
import sys
import pathlib
from io import BytesIO
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.generic import (
BooleanObject, NameObject, NumberObject, IndirectObject
)
def flatten_pdf(pdf_bytes: bytes) -> BytesIO:
"""Flatten a PDF, converting editable fields to non-editable."""
pdf = PdfFileReader(stream=BytesIO(initial_bytes=pdf_bytes))
if '/AcroForm' in pdf.trailer['/Root']:
pdf.trailer['/Root']['/AcroForm'].update({
NameObject('/NeedAppearances'): BooleanObject(True)
})
pdf_writer = PdfFileWriter()
# pylint: disable=protected-access
catalog = pdf_writer._root_object
if '/AcroForm' not in catalog:
pdf_writer._root_object.update({
NameObject('/AcroForm'):
IndirectObject(len(pdf_writer._objects), 0, pdf_writer)
})
pdf_writer._root_object['/AcroForm'][NameObject('/NeedAppearances')] = \
BooleanObject(True)
for page_index in range(0, len(pdf.pages)):
pdf_writer.addPage(pdf.getPage(page_index))
writer_page = pdf_writer.getPage(page_index)
for annotation_index in range(0, len(writer_page['/Annots'])):
writer_annot = writer_page['/Annots'][annotation_index].getObject()
writer_annot.update({NameObject('/Ff'): NumberObject(1)})
output_stream = BytesIO()
pdf_writer.write(output_stream)
return output_stream
if __name__ == "__main__":
pdf_bytes = pathlib.Path(sys.argv[1]).read_bytes()
flatten_output = flatten_pdf(pdf_bytes=pdf_bytes)
with open('output.pdf', 'wb') as f:
f.write(flatten_output.getbuffer())
This works for text-fillable fields and makes them uneditable, but the same is not true for other form types like checkboxes and radio buttons. How can I modify my code to preserve the selections in the input PDF and make the other form types non-editable yet preserve the entered information?