Trouble merging pdf files with PyPDF2 module for Python

Question

I have built a dictionary from a table that gives me a list of pdf filepaths for each key. Where there are multiple values for a key, I would like to merge the pdf files together and use the key in the output file name. I am getting an attribute error when I try to write out the merged_file:

'unicode' object has no attribute 'write'.

I am basing my code off of this post. Can someone see what might be wrong?

import arcpy, os, PyPDF2, shutil
arcpy.env.overwriteOutput = True

gb_xls = r'P:\Records\GIS\__Databases__\MapIndex\_MapSets_Grantors_Verification_Q.xlsx'
gb_gdb_tbl = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_tbl_sort = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_fields = ['Actual_SheetLabel','GBSheetLabel','Image_Path_Filename']
gb_dict = {}
v_list = []

lastkey = -1
lastvalue = ""

rows = sorted(arcpy.da.SearchCursor(gb_gdb_tbl,gb_fields))
for row in rows:
    k = row[0]
    v = row[2]
    if k not in gb_dict:
        gb_dict[k] = v
    if k == lastkey:
            v = str(lastvalue) + ', ' + str(v)
            gb_dict[k] = v
    lastkey = k
    lastvalue = v

merged_file = PyPDF2.PdfFileMerger()

for k,v in gb_dict.items():
    new_file = os.path.join(r'D:\GrantorBoxes_Merged_Pdfs',k+'.pdf')
    if len(str(v).split(',')) > 1:
        for i in [v]:
            val =  i.split(',')[0]
            merged_file.append(PyPDF2.PdfFileReader(val, 'rb'))
        merged_file.write(new_file)
    else:
        shutil.copyfile(v,new_file)

UPDATE:

I have some different code for merging PDFs files using PyPDF2 that will merge files without error. Now my problem is that its merging many more files together than I am expecting. I would like to loop through my dictionary, look for items that have more than one value (pdf file) per key, and merge those values together in one file named by the key. There must be something wrong with my looping or indentation but I can't see what it is. here is the updated code:

import arcpy, os, PyPDF2, shutil
arcpy.env.overwriteOutput = True

gb_xls = r'P:\Records\GIS\__Databases__\MapIndex\_MapSets_Grantors_Verification_Q.xlsx'
gb_gdb_tbl = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_tbl_sort = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_fields = ['Actual_SheetLabel','GBSheetLabel','Image_Path_Filename']
gb_dict = {}

lastkey = -1
lastvalue = ""

rows = sorted(arcpy.da.SearchCursor(gb_gdb_tbl,gb_fields))
for row in rows:
    k = row[0]
    v = row[2]
    if k not in gb_dict:
        gb_dict[k] = v
    if k == lastkey:
        v = str(lastvalue) + ',' + str(v)
        gb_dict[k] = v
    lastkey = k
    lastvalue = v

merger = PyPDF2.PdfFileMerger()

for k,v in gb_dict.items():
    v_list = v.split(',')
    if len(v_list) > 1:
        for i in v_list:
            print k,',',i
            input = open(i,'rb')
            merger.append(input)
        output = open(os.path.join(r'D:\GrantorBoxes_Merged_Pdfs',k+'.pdf'), "wb")
        merger.write(output)
        print output
    else:
        new_file = os.path.join(r'D:\GrantorBoxes_Merged_Pdfs',k+'.pdf')
        shutil.copyfile(str(v),new_file)

score 0 · Accepted Answer · answered Oct 24 '14 at 21:53

I scrapped using PyPDF2 and just used arcpy mapping module which includes some PDF document functions. Working code below:

import arcpy, os, shutil
arcpy.env.overwriteOutput = True

gb_xls = r'P:\Records\GIS\__Databases__\MapIndex\_MapSets_Grantors_Verification_Q.xlsx'
gb_gdb_tbl = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_tbl_sort = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_fields = ['Actual_SheetLabel','GBSheetLabel','Image_Path_Filename']
gb_dict = {}
lastkey = -1
lastvalue = ""

rows = sorted(arcpy.da.SearchCursor(gb_gdb_tbl,gb_fields))
for row in rows:
    k = row[0]
    v = row[2]
    if k not in gb_dict:
        gb_dict[k] = v
    if k == lastkey:
        v = str(lastvalue) + ',' + str(v)
        gb_dict[k] = v
    lastkey = k
    lastvalue = v

for k in gb_dict.keys():
    val = gb_dict.get(k)
    val_list = gb_dict.get(k).split(',')
    pdf_path = os.path.join(r'D:\GrantorBoxes_Merged_Pdfs',k + '.pdf')
    out_pdf_file = arcpy.mapping.PDFDocumentCreate(os.path.join(r'D:\GrantorBoxes_Merged_Pdfs',k + '.pdf'))
    if len(val_list) > 1:
        for v in val_list:
            print k,v
            out_pdf_file.appendPages(v)
        print out_pdf_file
        out_pdf_file.saveAndClose()
    else:
        shutil.copyfile(str(val),pdf_path)

Trouble merging pdf files with PyPDF2 module for Python

1 Answers1