I'm rather new to this. I need to convert all files (.csv and xlsx) i.e Supermarket.xlsx, sales.csv, marketing xlsx which are uploaded into a directory and convert it into a blob data into DB2, table name SB_DATA_BLOB_TEST with field names "data_column", "ingestion_date_time", "ingestion_file_name", "row_id".
I have only managed to insert 1 file, and state the timestamp, filename and row_id, but how can I apply the same function to a list of files uploaded to that directory, and apply the timestamp, list the filenames and the row_id accordingly without inserting this row_id manually as well?
The code:
import os
import pandas as pd
from subprocess import Popen, PIPE, run
import jaydebeapi
from project_lib import Project
constants = {
'INPUT_DIR': '/project_data/data_asset/'
}
file_names = {
'Supermart': 'Supermart.xlsx'
}
schema_name = 'ABC.'
table_prefix = 'SB_'
timestamp = pd.Timestamp.now("Asia/Singapore").strftime("%Y%m%d %H%M%S")
file = constants['INPUT_DIR'] + file_names['Supermart'] ## data
filename = constants['INPUT_DIR'] + file_names['Supermart'] ## ingestion_file_name
def convertToBinaryData(filename):
# Convert digital data to binary format
with open(filename, 'rb') as file:
binaryData = file.read()
return binaryData
def insertBLOB(data, ingestion_datetime, ingestion_filename, row_id):
print("Inserting BLOB into ABC SB_Data_Blob table")
try:
project = Project.access()
abc_sb_credentials = project.get_connection(name="abc_sb")
print(abc_sb_credentials)
abc_sb_connection = jaydebeapi.connect('com.ibm.db2.jcc.DB2Driver',
'{}://{}:{}/{}:user={};password={};'.format('jdbc:db2',
abc_sb_credentials['host'],
abc_sb_credentials['port'],
abc_sb_credentials['database'],
abc_sb_credentials['username'],
abc_sb_credentials['password']))
curs = abc_sb_connection.cursor()
sql_insert_blob_query = """ INSERT INTO ABC.SB_DATA_BLOB_TEST
(data_column, ingestion_date_time, ingestion_file_name, row_id) VALUES (?,?,?,?)"""
file = convertToBinaryData(data)
# Convert data into tuple format
insert_blob_tuple = (jaydebeapi.Binary(file), ingestion_datetime, ingestion_filename, row_id)
result = curs.execute(sql_insert_blob_query, insert_blob_tuple)
abc_sb_connection.commit()
print("File is inserted successfully as a BLOB into SB_DATA_BLOB table", result)
except Exception as error:
print(f"{error}")
print("Failed inserting BLOB data into DB2 table SB_DATA_BLOB".format(error))
finally:
## if abc_sb_connection.is_connected():
curs.close()
abc_sb_connection.close()
print("DB2 connection is closed")
insertBLOB(file, timestamp, filename, '2')