0

I am trying getting my error here

from PyPDF4.generic import ByteStringObject
pdf_reader = PyPDF4.PdfFileReader(pdf_file)
page = pdf_reader.pages[0]
page.mergePage(pdf_reader.pages[0])
content = page['/Contents'].getObject()
content = re.sub(b"/Tx BMC", "/Tx BMC BT /F1 12 Tf 1 0 0 1 50 50 Tm ({}) Tj ET".format(label).encode("utf8"), content.getData()).decode('latin-1')
content_object = ByteStringObject(content.encode('latin-1'))
page['/Contents'] = content_object

the error i am getting is

Traceback (most recent call last):
  File "D:\projects\work\pdfProject\main.py", line 48, in <module>
    page['/Contents'] = content_object
    ~~~~^^^^^^^^^^^^^
  File "C:\Users\Ammar\AppData\Local\Programs\Python\Python311\Lib\site-packages\PyPDF4\generic.py", line 505, in __setitem__
    raise ValueError("key must be PdfObject")
ValueError: key must be PdfObject

here is methodology which i tried Methodology

  1. OPEN PDF FOREGROUND. FOR EXAMPLE "PLAN 02 ELEMENTS.pdf"
  2. TAKE COORDINATES OF THE LIMITS OF THE PLANE
  3. OPEN EXCEL FILE "EXPORT"
  4. SCROLL THE ROWS OF THE EXCEL "EXPORT" FILE AND IF COLUMN B HAS NO DATA GO TO THE NEXT ROW IF COLUMN B HAS ANY DATA CHECK IF THE COORDINATES OF COLUMN "D" AND "E" ARE IN THE PLANE PDF.
  5. CHECK IF THE COORDINATES OF THE ROW ARE WITHIN THE LIMITS OF THE PDF PLANE. IF THEY ARE NOT ON THE PLANE, GO TO THE NEXT ROW. IF THEY ARE ON THE MAP, PUT THE LABEL OF COLUMN "J" IN THE PDF MAP.
  6. REPEAT STEPS 4, 5 UNTIL YOU REACH THE LAST ROW OF THE EXCEL.
  7. OPEN NEXT PLANE AND REPEAT STEPS 2, 3, 4, 5, 6.

NOTE: THE COORDINATES EXPRESSED IN COLUMNS "D" AND "E" ARE EXPRESSED IN UTM COORDINATES. FOR WHICH THE TIME ZONE OF SAID COORDINATES MUST BE KNOWN. THE TIME ZONE OF THE SAMPLE "EXPORT" FILE IS 30.

import openpyxl
import PyPDF4
from pyproj import Transformer
import pyproj
from PyPDF4.generic import ByteStringObject
from PyPDF4.pdf import ContentStream
from PyPDF4.generic import NameObject, NumberObject, ByteStringObject

import re

utm_zone = '30N'
pdf_file_name = 'geo.pdf'
pdf_file = open(pdf_file_name, 'rb')
pdf_reader = PyPDF4.PdfFileReader(pdf_file)
page = pdf_reader.pages[0]
llx, lly = page.trimBox.lowerLeft
urx, ury = page.trimBox.upperRight
# print(llx,lly,urx,ury)
utm_zone = '30N'
crs_from = pyproj.CRS('EPSG:32630')
transformer = Transformer.from_crs(crs_from, 'EPSG:4326')
excel_file_name = 'EXPORT.xlsx'
workbook = openpyxl.load_workbook(excel_file_name)
worksheet = workbook.active
for row in worksheet.iter_rows(min_row=2):
    # print(row[1].value)
    if row[1].value is None:
        continue
    easting, northing = float(row[3].value.replace(',', '.')), float(row[4].value.replace(',', '.'))
    transformer_lat_lon = Transformer.from_crs("EPSG:32630", "EPSG:4326")
    lon, lat = transformer_lat_lon.transform(easting, northing)
    # print(easting,northing)
    # print(lon,lat)
    # print(llx,easting,urx)
    if llx <= easting  and lly <= northing:
        x, y = transformer.transform(easting, northing)
        print(x,y)
        # print(x,y)
        # print("arham")
        print(page.mediaBox.upperRight[1])
        if 0 <= x <= page.mediaBox.upperRight[0]:
            label = row[9].value
            # print(label)
            page.mergePage(pdf_reader.pages[0])
            content = page['/Contents'].getObject()
            content = re.sub(b"/Tx BMC", "/Tx BMC BT /F1 12 Tf 1 0 0 1 50 50 Tm ({}) Tj ET".format(label).encode("utf8"), content.getData()).decode('latin-1')
            content_object = ByteStringObject(content.encode('latin-1'))
            page['/Contents'] = content_object
    else:
        continue
pdf_writer = PyPDF4.PdfWriter()
pdf_writer.add_page(pdf_reader.pages[0])
with open('output.pdf', 'wb') as output_file:
    pdf_writer.write(output_file)
pdf_file.close()
workbook.close()

I am very to new to pdf reading and merging and working with it in python. Can you please spot the cause why i am getting this error and how can i solve it?

timp bill
  • 57
  • 7

0 Answers0