I am currently building a docx/pdf converter in Python, and have decided to split up the files into 2 documents:
main.py
- controls the program's flow;
convert_to_text.py
- contains the function that converts pdf/docx files to txt.
At the moment, I am having difficulties passing the global variable cv
across both files and importing all of the functions in convert_to_text.py
to use in main.py
. This is the error I get:
C:\Python27\python.exe D:/cv-parser/main.py
Traceback (most recent call last):
File "D:/cv-parser/main.py", line 1, in <module>
from convert_to_text import *
File "D:\cv-parser\convert_to_text.py", line 1, in <module>
from main import cv
File "D:\cv-parser\main.py", line 5, in <module>
document_to_text("resources\CV.pdf")
NameError: name 'document_to_text' is not defined
Process finished with exit code 1
How do I fix it?
This is my code so far:
In convert_to_text.py
from main import cv
import docx
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from cStringIO import StringIO
def document_to_text(filename):
if filename[-5:] == ".docx":
doc = docx.Document(filename)
full_text = []
for para in doc.paragraphs:
full_text.append(para.text)
global cv
cv = '\n'.join(full_text)
return cv
elif filename[-4:] == ".pdf":
return pdf_to_txt(filename)
def pdf_to_txt(file_path):
rsrcmgr = PDFResourceManager()
retstr = StringIO()
codec = 'utf-8'
laparams = LAParams()
device = TextConverter(rsrcmgr, retstr, codec=codec, laparams=laparams)
fp = file(file_path, 'rb')
interpreter = PDFPageInterpreter(rsrcmgr, device)
password = ""
maxpages = 0
caching = True
pagenos = set()
for page in PDFPage.get_pages(fp, pagenos, maxpages=maxpages, password=password, caching=caching,
check_extractable=True):
interpreter.process_page(page)
fp.close()
device.close()
str = retstr.getvalue()
retstr.close()
global cv
cv = str
return cv
In main.py
from convert_to_text import *
cv = 0
document_to_text("resources\CV.pdf")
print cv