My goal is to combine three CSV files into a DataFrame. The files are:
'Gender':
'Jobs':
'Names'
The 'Gender' file shares the 'First name' column with the 'Names' file. The 'Jobs' file shares the 'Last name' column with the 'Names' file but it doesn't share any with the 'Gender' file.
I wrote the code below, which creates a simple GUI.
import csv
import pandas as pd
from tkinter import *
from tkinter.filedialog import askopenfilename
from tkinter import filedialog
import os
class GUI:
#create an initialisation method
def __init__(self, master):
self.master = root
#create an empty list for the three files
file_list = []
#allow the user to select files
def select_files(self):
self.file_path = filedialog.askopenfilename()
return self.file_path
#add the files to the file_list
def add_files_to_list(self):
select_files(self)
self.file_name = os.path.basename(self.file_path)
file_list.append(self.file_name)
return file_list
#read the CSV files using pandas and combine them
def convert_files(self):
try:
filez = add_files_to_list(self)
df1 = pd.read_csv(filez[0], error_bad_lines=False, dtype='unicode')
df2 = pd.read_csv(filez[1], error_bad_lines=False, dtype='unicode')
df3 = pd.read_csv(filez[2], error_bad_lines=False, dtype='unicode')
self.result = df1.merge(df2.merge(df3))
df_label = Label(master, text=(self.result), font=15, highlightthickness=0, bd=5, justify='center')
df_label.pack(fill='both', pady=10, padx=10)
#ignore the error shown when self.result tries to merge the DataFrame but cannot because not all 3 files were
#added
except IndexError:
pass
#create a button to allow the user to select files
convert_files_button = Button(self.master, text='Convert files...', font=(15), highlightthickness=4, fg='black')
convert_files_button.pack(fill='x')
convert_files_button.bind('<Button-1>', convert_files)
root = Tk()
window = GUI(root)
root.title('Data Merger')
root.geometry('800x600')
root.state('zoomed')
root.mainloop()
When the user clicks on the 'Convert files' button a DataFrame is created. However, it only works if the files are added in the correct order, i.e. the files to be merged share a common column. When files are not added in the correct order, I get the merge error:
I would be most grateful if someone could hint at a more elegant or error proof way of achieving my goal. Also, any hints on how to justify the DataFrame content when placed in a label widget would be great.