I am quite new to Python and also very new to using postgresql, so please forgive me if this is something basic (which I - so far - have failed to achieve). I'm trying to write a python code that:
- creates a new database (
testdb
) - reads a csv file into a pandas dataframe
- creates and populates a new table within the database out of the pandas dataframe.
I have 3 different files so far: a) an .ini
-File where I store the database info I need to create a new database, b) a .csv
-File (from here, named 100_recs.csv
) and c) my python code.
database.ini:
[postgresql]
host=localhost
user=postgres
password=creator
port=5432
db_creator.py:
from config import config
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import pandas as pd
# useful info for psycopg2:
# https://stackoverflow.com/questions/34484066/create-a-postgres-database-using-python
class MyDB(object):
def __init__(self):
self.params = config()
def create_new_db(self, newdb):
user, host, port = self.params['user'], self.params['host'], testdb.params['port']
pw = self.params['password']
url = 'postgresql://{}:{}@{}:{}/{}'
url = url.format(user, pw, host, port, newdb)
engine = create_engine(url)
if not database_exists(engine.url):
create_database(engine.url)
print(database_exists(engine.url))
if __name__ == '__main__':
testdb = MyDB()
testdb.create_new_db('testdb')
When I try to do it like that, I'm getting the following Error:
sqlalchemy.exc.OperationalError: (psycopg2.OperationalError)
However, it works when I do it as suggested in this SO post. Unfortunately, the answer in this post uses psycopg2
to create a new database, but I would like to do it with sqlalchemy
(also because I think that working with Pandas dataframe further on will be easier with sqlalchemy
(as e.g. shown here. Or am I wrong?). I thought that when doing it with sqlqlchemy
then something like the following should be possible to read the data from the csv-file into a pandas dataframe and then populate a table in the new database:
def connect_alchemy(user, host, port, db, password):
url = 'postgresql://{}:{}@{}:{}/{}'
url = url.format(user, password, host, port, db)
con = sqlalchemy.create_engine(url, client_encoding='utf8')
mydata = pd.read_csv('100_recs.csv', delimiter=';', quotechar='"')
data_db = mydata.to_sql(name='100_records', con=con, if_exists='replace', index=True, chunksize=10)
print(con.execute('SELECT * from 100_records'))
But honestly, I'm stuck here and need some help... It would be great if someone could point me into the right direction.
EDIT:
Ah stupid me!
So I had an old typo in the follwing lines in db_creator.py
user, host, port = testdb.params['user'], testdb.params['host'], testdb.params['port']
pw = testdb.params['password']
should be:
user, host, port = self.params['user'], self.params['host'], self.params['port']
pw = self.params['password']
I already changed this.
Then I also forgot to add the config.py
file here. Apologies for that.
Here you go:
config.py
# source: http://www.postgresqltutorial.com/postgresql-python/connect/
from configparser import ConfigParser
def config(filename='database.ini', section='postgresql'):
# create a parser
parser = ConfigParser()
# read config file
parser.read(filename)
# get section, default to postgresql
db = {}
if parser.has_section(section):
params = parser.items(section)
for param in params:
db[param[0]] = param[1]
else:
raise Exception('Section {0} not found in the {1} file'.format(section, filename))
return db
EDIT 2:
It works now with the following setup:
database.ini:
[postgresql]
host=localhost
user=postgres
password=postgres
port=5432
config.py:
# source: http://www.postgresqltutorial.com/postgresql-python/connect/
from configparser import ConfigParser
def config(filename='database.ini', section='postgresql'):
# create a parser
parser = ConfigParser()
# read config file
parser.read(filename)
# get section, default to postgresql
db = {}
if parser.has_section(section):
params = parser.items(section)
for param in params:
db[param[0]] = param[1]
else:
raise Exception('Section {0} not found in the {1} file'.format(section, filename))
return db
the csv file: from here
db_creator.py
from config import config
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy_utils import database_exists, create_database
import pandas as pd
# useful info for psycopg2:
# https://stackoverflow.com/questions/34484066/create-a-postgres-database-using-python
class MyDB(object):
def __init__(self):
self.params = config()
def create_new_db(self, newdb):
user, host, port = self.params['user'], self.params['host'], self.params['port']
pw = self.params['password']
url = 'postgresql://{}:{}@{}:{}/{}'
url = url.format(user, pw, host, port, newdb)
self.engine = create_engine(url, client_encoding='utf8')
if not database_exists(self.engine.url):
create_database(self.engine.url)
# print(database_exists(engine.url))
def df2postgres(engine, df):
con = engine.connect()
df.to_sql(name='records', con=con, if_exists='replace', index=True, chunksize=10)
return con
if __name__ == '__main__':
testdb = MyDB()
testdb.create_new_db('testdb')
engn = testdb.engine
df = pd.read_csv('100_recs.csv', delimiter=';', quotechar='"', encoding='utf-8')
con = df2postgres(engine=engn, df=df)
dta = con.execute('SELECT * FROM records LIMIT 5;')
print(dta.fetchall())
Apologies for the stupid mistakes...