I'm writing a flask API to extract text from the document. I want to check the extension and if it is pdf I'll give it to the pdf miner else docx2txt.
@app.route('/text-extraction', methods = ['POST'])
def text_extractions():
f = request.files['files']
split_tup = os.path.splitext(f)
file_extension = split_tup[1]
if file_extension == '.pdf':
return extract_text(f)
else:
text = docx2txt.process(f)
if extract_text:
return text.replace('\t', ' ')
return None