I recreated the PDF reader program that is made in this video:
I tested it with some random pdf files I have on my PC, but the program extracts the text of only a few of them. Why is so? Is it a flaw in the program, am I missing something or maybe there are some specific pdf files that cannot be read by default?
Here is the full code:
import tkinter as tk
import PyPDF2
from PIL import ImageTk,Image
from tkinter.filedialog import askopenfile
root = tk.Tk()
root.geometry('+%d+%d'%(975,150))
canvas = tk.Canvas(root, width=600, height=300)
canvas.grid(columnspan=3, rowspan=3)
logo = Image.open("tkinterResources/logo.png")
logo = ImageTk.PhotoImage(logo)
logo_label = tk.Label(image=logo)
logo_label.image = logo
logo_label.grid(row=0, column=1)
instructions = tk.Label(root, text="Select a PDF file on yo ur computer to extract all its text", font="Raleway")
instructions.grid(row=1, column=0, columnspan=3)
def open_file():
browse_text.set("loading...")
file = askopenfile(parent=root, mode="rb", title="Choose a file", filetype=[("Pdf file", "*.pdf")])
if file:
read_pdf = PyPDF2.PdfFileReader(file)
page = read_pdf.getPage(0)
page_content = page.extractText()
text_box = tk.Text(root, height=10, width=50, padx=15, pady=15)
text_box.insert(1.0, page_content)
text_box.tag_configure("center", justify="center")
text_box.tag_add("center", 1.0, "end")
text_box.grid(row=3, column=1)
browse_text.set("Browse")
browse_text = tk.StringVar()
browse_btn = tk.Button(root, textvariable = browse_text,
font="Raleway", bg="#20bebe", fg="white", height=2, width=15,
command=open_file)
browse_text.set("Browse")
browse_btn.grid(row=2, column=1)
canvas = tk.Canvas(root, width=600, height=250)
canvas.grid(columnspan=3)
root.mainloop()
I also created my own pdf file with just one line of text using OpenOffice and even that doesn't seem to work.