Can anyone help me out?
Thanks in Advance.
Code :
from PyPDF2 import PdfFileReader
def text_extractor(path):
with open(path, 'rb') as f:
pdf = PdfFileReader(f)
page = pdf.getPage(2)
print(page)
text = page.extractText().encode('utf-8')
print(text)
if __name__ == '__main__':
path = '/home/ubuntu/Desktop/hi.pdf'
text_extractor(path)
Output :
{'/Parent': IndirectObject(137, 0), '/CropBox': [0, 0, 960, 540], '/Rotate': 0, '/Resources': {'/ColorSpace': {'/CS0': IndirectObject(155, 0)}, '/XObject': {'/Im0': IndirectObject(6, 0), '/Im1': IndirectObject(8, 0)}, '/Font': {'/TT1': IndirectObject(132, 0), '/TT0': IndirectObject(157, 0), '/TT2': IndirectObject(159, 0)}, '/ProcSet': ['/PDF', '/Text', '/ImageC']}, '/Contents': IndirectObject(5, 0), '/MediaBox': [0, 0, 960, 540], '/Type': '/Page'}
b'65#-\'\n!C,%03D\n!9$*0&30%30\n!E$34&,%&$AA(#6$/#,%\n!F0?860?&3$-A(#%:\n!G+$/&2$"#$H(0I($40"&@#((&4,8&830\n!G+$/&4,8&(#-#/#%:&2$"#$H(0\n!J,@&/,&+$%?(0K&E20"4/+#%:&0(30\n'