I have built a dictionary from a table that gives me a list of pdf filepaths for each key. Where there are multiple values for a key, I would like to merge the pdf files together and use the key in the output file name. I am getting an attribute error when I try to write out the merged_file:
'unicode' object has no attribute 'write'.
I am basing my code off of this post. Can someone see what might be wrong?
import arcpy, os, PyPDF2, shutil
arcpy.env.overwriteOutput = True
gb_xls = r'P:\Records\GIS\__Databases__\MapIndex\_MapSets_Grantors_Verification_Q.xlsx'
gb_gdb_tbl = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_tbl_sort = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_fields = ['Actual_SheetLabel','GBSheetLabel','Image_Path_Filename']
gb_dict = {}
v_list = []
lastkey = -1
lastvalue = ""
rows = sorted(arcpy.da.SearchCursor(gb_gdb_tbl,gb_fields))
for row in rows:
k = row[0]
v = row[2]
if k not in gb_dict:
gb_dict[k] = v
if k == lastkey:
v = str(lastvalue) + ', ' + str(v)
gb_dict[k] = v
lastkey = k
lastvalue = v
merged_file = PyPDF2.PdfFileMerger()
for k,v in gb_dict.items():
new_file = os.path.join(r'D:\GrantorBoxes_Merged_Pdfs',k+'.pdf')
if len(str(v).split(',')) > 1:
for i in [v]:
val = i.split(',')[0]
merged_file.append(PyPDF2.PdfFileReader(val, 'rb'))
merged_file.write(new_file)
else:
shutil.copyfile(v,new_file)
UPDATE:
I have some different code for merging PDFs files using PyPDF2 that will merge files without error. Now my problem is that its merging many more files together than I am expecting. I would like to loop through my dictionary, look for items that have more than one value (pdf file) per key, and merge those values together in one file named by the key. There must be something wrong with my looping or indentation but I can't see what it is. here is the updated code:
import arcpy, os, PyPDF2, shutil
arcpy.env.overwriteOutput = True
gb_xls = r'P:\Records\GIS\__Databases__\MapIndex\_MapSets_Grantors_Verification_Q.xlsx'
gb_gdb_tbl = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_tbl_sort = r'C:\temp\temp.gdb\_MapSets_Grantors_Verification_Q'
gb_fields = ['Actual_SheetLabel','GBSheetLabel','Image_Path_Filename']
gb_dict = {}
lastkey = -1
lastvalue = ""
rows = sorted(arcpy.da.SearchCursor(gb_gdb_tbl,gb_fields))
for row in rows:
k = row[0]
v = row[2]
if k not in gb_dict:
gb_dict[k] = v
if k == lastkey:
v = str(lastvalue) + ',' + str(v)
gb_dict[k] = v
lastkey = k
lastvalue = v
merger = PyPDF2.PdfFileMerger()
for k,v in gb_dict.items():
v_list = v.split(',')
if len(v_list) > 1:
for i in v_list:
print k,',',i
input = open(i,'rb')
merger.append(input)
output = open(os.path.join(r'D:\GrantorBoxes_Merged_Pdfs',k+'.pdf'), "wb")
merger.write(output)
print output
else:
new_file = os.path.join(r'D:\GrantorBoxes_Merged_Pdfs',k+'.pdf')
shutil.copyfile(str(v),new_file)