I'm having some trouble writing a script in Python 2.7 on Windows. In part of the script, I need to compose a windows file path from a directory and filename with extension. It works fine when I write it in as a string, but I get an error when I try to do it as a concatenate. I think it might have something to do with spaces in the paths.
Here is a code section that works
filepath = os.path.normpath("C:/Users/jpettit/documents/projects/vendor files script/test files/122484.pdf")
print find_filename(filepath)
And here is the code section that doesn't work
directory_path = os.path.normpath("C:/Users/jpettit/documents/projects/vendor files script/test files")
file = "122484.pdf"
filepath = os.path.join(directory_path, file)
print find_filename(filepath)
I'm having a really hard time seeing what the difference between these two would be. Here's the code in context of the entire script.
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from cStringIO import StringIO
import re
import os
def convert_pdf_to_txt(path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = file(path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
pagenos=set()
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password,caching=caching, check_extractable=True):
interpreter.process_page(page)
fp.close()
device.close()
str = retstr.getvalue()
retstr.close()
return str
def find_filename(filepath):
try:
filenumberlocation = re.search('\d\d\d\d\d\d\.pdf',filepath, re.IGNORECASE)
filenumber = filenumberlocation.group()[:6]
print filepath
pdfconverted = convert_pdf_to_txt(filepath)
revlocation = re.search('REV #\n....',pdfconverted)
rev = revlocation.group()[-4:]
new_filename = filenumber + ' ' + rev + '.pdf'
return new_filename
except AttributeError:
return os.path.basename(filepath)
def list_files(directory_path):
filenames_list = []
for dirpath, dirnames, filenames in os.walk(directory_path):
filenames_list.extend(filenames)
return filenames_list
directory_path = os.path.normpath("C:/Users/jpettit/documents/projects/vendor files script/test files")
file_list = list_files(directory_path)
for file in file_list:
filepath = os.path.join(directory_path, file)
os.rename(filepath, os.path.join(directory_path, find_filename(file)))
The error that I get says the following
Traceback (most recent call last):
File "revfind.txt", line 59, in <module>
os.rename(filepath, os.path.join(directory_path, find_filename(file)))
File "revfind.txt", line 34, in find_filename
pdfconverted = convert_pdf_to_txt(filepath)
File "revfind.txt", line 16, in convert_pdf_to_txt
fp = file(path, 'rb')
TypeError: 'str' object is not callable
As you can probably tell, I'm very new at this, and would really appreciate any guidance!