I want to extract some txt in PDF. Because of this I try to extract whole data in text file but its gone crazy. I decide to do transform data PDF to MySQL. I cant connect my code with database. I dont have much knowledge about database, I just create schema (name:deneme), inside schema i create table which name is cizim. While trying to connect, i got this error message. BTW i try to find some data in txt so that i can write excel this data.
from asyncore import write
import glob
from numpy import True_
from tabula import read_pdf
from tabulate import tabulate
import PyPDF2
import re
import os
import pandas as pd
import mysql.connector
db = mysql.connector.connect(host="127.0.0.1", user='root', password='admin', db='deneme')
c = db.cursor()
def findPdf():
count = 0
os.chdir("../deneme")
for file in glob.glob("*.pdf"):
print(file)
pdfToTxt(file)
def pdfToTxt(name):
readPdf(name)
findWordInTxt(name)
def readPdf(name):
readedFile = read_pdf(name,pages="all",encoding="ISO-8859-1")
createTxt(name,readedFile)
def createTxt(name,readedfile):
txt = open(name+".txt","a",errors="ignore")
txt.write(tabulate(readedfile))
txt.close()
acilacak_dosya = open(name,encoding="ISO-8859-1")
yuklenecek_dosya = acilacak_dosya.read()
c.execute("INSERT INTO pdfler (cizim) VALUES (%s)", (yuklenecek_dosya,))
db.commit()
def writeOnExcel(txt,i,name):
with open(name+'.txt') as f:
if (str(txt+str(i))) in f.read():
list1.append(name)
list2.append(txt+str(i))
def findWordInTxt(name):
txt = 'b0'
with open(name+'.txt') as f:
for i in range (10):
writeOnExcel(txt,i,name)
col1 = "X"
col2 = "Y"
data = pd.DataFrame({col1:list1,col2:list2})
data.to_excel('sample_datas.xlsx', sheet_name='sheet1', index=False, encoding="ISO-8859-1")
list1 = ['FILE_NAME']
list2 = ['REV']
findPdf()
c.execute("INSERT INTO cizim (deneme) VALUES (%s) " (yuklenecek_dosya,))
TypeError: 'str' object is not callable