I am trying getting my error here
from PyPDF4.generic import ByteStringObject
pdf_reader = PyPDF4.PdfFileReader(pdf_file)
page = pdf_reader.pages[0]
page.mergePage(pdf_reader.pages[0])
content = page['/Contents'].getObject()
content = re.sub(b"/Tx BMC", "/Tx BMC BT /F1 12 Tf 1 0 0 1 50 50 Tm ({}) Tj ET".format(label).encode("utf8"), content.getData()).decode('latin-1')
content_object = ByteStringObject(content.encode('latin-1'))
page['/Contents'] = content_object
the error i am getting is
Traceback (most recent call last):
File "D:\projects\work\pdfProject\main.py", line 48, in <module>
page['/Contents'] = content_object
~~~~^^^^^^^^^^^^^
File "C:\Users\Ammar\AppData\Local\Programs\Python\Python311\Lib\site-packages\PyPDF4\generic.py", line 505, in __setitem__
raise ValueError("key must be PdfObject")
ValueError: key must be PdfObject
here is methodology which i tried Methodology
- OPEN PDF FOREGROUND. FOR EXAMPLE "PLAN 02 ELEMENTS.pdf"
- TAKE COORDINATES OF THE LIMITS OF THE PLANE
- OPEN EXCEL FILE "EXPORT"
- SCROLL THE ROWS OF THE EXCEL "EXPORT" FILE AND IF COLUMN B HAS NO DATA GO TO THE NEXT ROW IF COLUMN B HAS ANY DATA CHECK IF THE COORDINATES OF COLUMN "D" AND "E" ARE IN THE PLANE PDF.
- CHECK IF THE COORDINATES OF THE ROW ARE WITHIN THE LIMITS OF THE PDF PLANE. IF THEY ARE NOT ON THE PLANE, GO TO THE NEXT ROW. IF THEY ARE ON THE MAP, PUT THE LABEL OF COLUMN "J" IN THE PDF MAP.
- REPEAT STEPS 4, 5 UNTIL YOU REACH THE LAST ROW OF THE EXCEL.
- OPEN NEXT PLANE AND REPEAT STEPS 2, 3, 4, 5, 6.
NOTE: THE COORDINATES EXPRESSED IN COLUMNS "D" AND "E" ARE EXPRESSED IN UTM COORDINATES. FOR WHICH THE TIME ZONE OF SAID COORDINATES MUST BE KNOWN. THE TIME ZONE OF THE SAMPLE "EXPORT" FILE IS 30.
import openpyxl
import PyPDF4
from pyproj import Transformer
import pyproj
from PyPDF4.generic import ByteStringObject
from PyPDF4.pdf import ContentStream
from PyPDF4.generic import NameObject, NumberObject, ByteStringObject
import re
utm_zone = '30N'
pdf_file_name = 'geo.pdf'
pdf_file = open(pdf_file_name, 'rb')
pdf_reader = PyPDF4.PdfFileReader(pdf_file)
page = pdf_reader.pages[0]
llx, lly = page.trimBox.lowerLeft
urx, ury = page.trimBox.upperRight
# print(llx,lly,urx,ury)
utm_zone = '30N'
crs_from = pyproj.CRS('EPSG:32630')
transformer = Transformer.from_crs(crs_from, 'EPSG:4326')
excel_file_name = 'EXPORT.xlsx'
workbook = openpyxl.load_workbook(excel_file_name)
worksheet = workbook.active
for row in worksheet.iter_rows(min_row=2):
# print(row[1].value)
if row[1].value is None:
continue
easting, northing = float(row[3].value.replace(',', '.')), float(row[4].value.replace(',', '.'))
transformer_lat_lon = Transformer.from_crs("EPSG:32630", "EPSG:4326")
lon, lat = transformer_lat_lon.transform(easting, northing)
# print(easting,northing)
# print(lon,lat)
# print(llx,easting,urx)
if llx <= easting and lly <= northing:
x, y = transformer.transform(easting, northing)
print(x,y)
# print(x,y)
# print("arham")
print(page.mediaBox.upperRight[1])
if 0 <= x <= page.mediaBox.upperRight[0]:
label = row[9].value
# print(label)
page.mergePage(pdf_reader.pages[0])
content = page['/Contents'].getObject()
content = re.sub(b"/Tx BMC", "/Tx BMC BT /F1 12 Tf 1 0 0 1 50 50 Tm ({}) Tj ET".format(label).encode("utf8"), content.getData()).decode('latin-1')
content_object = ByteStringObject(content.encode('latin-1'))
page['/Contents'] = content_object
else:
continue
pdf_writer = PyPDF4.PdfWriter()
pdf_writer.add_page(pdf_reader.pages[0])
with open('output.pdf', 'wb') as output_file:
pdf_writer.write(output_file)
pdf_file.close()
workbook.close()
I am very to new to pdf reading and merging and working with it in python. Can you please spot the cause why i am getting this error and how can i solve it?