I am trying make a PyQt5 GUI that allows:
- Launch of a UI file selector on button click, filters for .csv files.
- If a file is selected, load the file into a pandas data frame.
- For each data frame column, access the unique elements of that data frame column and create a QListWidget containing the unique values of that column.
- Each QListWidget is added into a QGridLayout that is also a scrollable area.
So far, I have been able to do just that. When attempt to scale up the functionality to .csv files containing ~1M rows, as one could expect there is a performance hit. To get around the slow down I am attempting to instantiate QListWidget objects that are added into a QGridLayout via a concurrent.futures ThreadPoolExecutor. Using the ThreadPoolExecutor, I get no errors and none of the debug prints indicate that anything is unexpectedly.
In the method DataManager::load_data there is a call to DataManager::make_data_filters which works just fine. However, switching the call to DataManager::make_data_filters2 or DataManager::make_data_filters3 results in a much faster response but no QListWidgets are added/visible inside the QGridLayout.
I presume for testing that any standard/tabular .csv file will work, that is the intent after all. My main goal is to speed things up for large .csv files. Any comments, suggestions, fixes, or alternative approaches are welcomed, thanks. The code below contains everything that you need, except for a standard tabular .csv file. For my own sake, I just downloaded some dummy data from the internet.
import os
import sys
import pandas as pd
from PyQt5 import QtCore, QtGui, QtWidgets
from concurrent.futures import ThreadPoolExecutor
class ixListBox(QtWidgets.QWidget):
def __init__(self, label_str, values, callback_selection, *args, **kwargs):
# Call the parent constructor
super().__init__(*args, **kwargs)
# Track the selection callback
self.callback_selection = callback_selection
# Initialize the widget layout
self.init_widget_layout()
# Initialize the dtaa filter space
self.init_label_space( label_str=label_str )
# Initialize the button space
self.init_listbox_space( values=values )
def init_widget_layout(self):
# Create a layout manager for the central frame/widget
self.central_layout = QtWidgets.QVBoxLayout()
# Connect the layout to the frame
self.setLayout( self.central_layout )
def init_label_space(self, label_str):
# Initialize a label
label = QtWidgets.QLabel()
label.setText( label_str )
label.setAlignment( QtCore.Qt.AlignCenter )
# Add label to widget
self.central_layout.addWidget( label )
def init_listbox_space(self, values):
# Initialize a listbox
self.listbox = QtWidgets.QListWidget()
# Set widget to allow multiselection
self.listbox.setSelectionMode( QtWidgets.QAbstractItemView.MultiSelection )
# Connect the listbox to a selection callback
self.listbox.currentItemChanged.connect( self.callback_selection )
# Add the values to the listbox
self.listbox.addItems( values )
# Add listbox to widget
self.central_layout.addWidget( self.listbox )
class DataManager(QtWidgets.QDockWidget):
def __init__(self, label='Data Manager', *args, **kwargs):
# Call the parent constructor
super().__init__(*args, **kwargs)
# Set the title bar
self.setWindowTitle( label )
# Initialize widget default values
self.init_widget_values()
# Configure widget properties
self.init_widget_properties()
# Initialize the widget layout
self.init_widget_layout()
def init_widget_values(self):
# Variable to track widget visibility
self.is_visible = True
def init_widget_properties(self):
# Set whether the widget is initially docked
self.setFloating( False )
# Connect the dock to a visibility callback
self.setVisible( self.is_visible )
# Set the allowed area that can be docked into
self.setAllowedAreas( QtCore.Qt.TopDockWidgetArea | QtCore.Qt.BottomDockWidgetArea )
# Set the enabled features of the widget
self.setFeatures( QtWidgets.QDockWidget.DockWidgetMovable | QtWidgets.QDockWidget.DockWidgetFloatable )
def init_widget_layout(self):
# Create a central frame
self.central_frame = QtWidgets.QFrame()
# Create a layout manager for the central frame/widget
self.central_layout = QtWidgets.QVBoxLayout()
# Connect the layout to the frame
self.central_frame.setLayout( self.central_layout )
# Connect the layout manager to the central frame
self.setWidget( self.central_frame )
# Initialize the dtaa filter space
self.init_data_filter_space()
# Initialize the button space
self.init_button_space()
def init_data_filter_space(self):
# Initialize a scrollable area
scrollArea = QtWidgets.QScrollArea()
scrollArea.setWidgetResizable( True )
scrollAreaWidgetContents = QtWidgets.QWidget()
scrollArea.setWidget( scrollAreaWidgetContents )
# Create a layout manager for the central frame/widget
self.layout_filters = QtWidgets.QGridLayout( scrollAreaWidgetContents )
# Add the button space frame to the layout
self.central_layout.addWidget( scrollArea )
def init_button_space(self):
# Create a central frame
frame = QtWidgets.QFrame()
# Create a layout manager for the central frame/widget
layout = QtWidgets.QHBoxLayout()
# Connect the layout manager to the central frame
frame.setLayout( layout )
# Add the button space frame to the layout
self.central_layout.addWidget( frame )
# Initialize a load data button
self.make_button( layout=layout, label='Load Data', callback=self.load_data )
# Initialize a save data button
self.make_button( layout=layout, label='Save Data', callback=self.save_data )
def make_button(self, layout, label, callback):
# Initialize a button
button = QtWidgets.QPushButton()
button.setText( label )
button.clicked.connect( callback )
# Add the button to the layout
layout.addWidget( button )
def load_data(self):
# Set the UI properties
options = QtWidgets.QFileDialog.Options()
options |= QtWidgets.QFileDialog.DontUseNativeDialog
options |= QtWidgets.QFileDialog.DontUseCustomDirectoryIcons
# Launch a file selection UI
file_name, _ = QtWidgets.QFileDialog.getOpenFileName( self, 'Load Data File', os.getcwd(), 'Excel files (*.csv)' )
# Check if a selection was made
if not file_name:
return
# Load the data into a data frame
self.df = pd.read_csv( file_name )
# Load and make data filters based on file contents
self.make_data_filters3()
def save_data(self):
print('SAVE DATA')
def make_data_filters(self):
# Get the data column keys
row_indx = col_indx = 0
for column_key in self.df.columns:
# Get the unique values for the current column
values = self.df[ column_key ].astype( str ).unique().tolist()
# Initialize a list box to track the unique values
listbox = ixListBox( label_str=column_key, values=values, callback_selection=self.callback_filter_selection )
# Add the listbox widget to the layout
self.layout_filters.addWidget( listbox, row_indx, col_indx, 1, 1 )
col_indx += 1
# Check for new row condition
if col_indx > 4:
row_indx += 1
col_indx = 0
def make_data_filters2(self):
# Build the listbox instances in parallel
with ThreadPoolExecutor( os.cpu_count() - 1 ) as executor:
listboxes = executor.map( self.get_listbox_filter, [ column_key for column_key in self.df.columns ] )
# Get the data column keys
row_indx = col_indx = 0
for listbox in listboxes:
# Add the listbox widget to the layout
self.layout_filters.addWidget( listbox, row_indx, col_indx, 1, 1 )
col_indx += 1
# Check for new row condition
if col_indx > 4:
row_indx += 1
col_indx = 0
print(listbox, row_indx, col_indx)
def make_data_filters3(self):
# Pre-calculate the listbox positions
row_indx = col_indx = 0
widget_positions = []
for column_key in self.df.columns:
# Track the widget position
widget_positions.append( ( row_indx, col_indx ) )
# Increment the column count
col_indx += 1
# Check for new row condition
if col_indx > 4:
row_indx += 1
col_indx = 0
# Build the listbox instances in parallel
with ThreadPoolExecutor( os.cpu_count() - 1 ) as executor:
executor.map( self.get_listbox_filter2, [ widget_spec
for widget_spec in zip( self.df.columns, widget_positions ) ] )
def get_listbox_filter(self, column_key):
# Get the unique values for the current column
values = self.df[ column_key ].astype( str ).unique().tolist()
# Initialize a list box to track the unique values
return ixListBox( label_str=column_key, values=values, callback_selection=self.callback_filter_selection )
def get_listbox_filter2(self, widget_spec):
# Extract the inputs
column_key = widget_spec[ 0 ]
row_indx, col_indx = widget_spec[ 1 ]
# Get the unique values for the current column
values = self.df[ column_key ].astype( str ).unique().tolist()
# Initialize a list box to track the unique values
listbox = ixListBox( label_str=column_key, values=values, callback_selection=self.callback_filter_selection )
# Add the listbox widget to the layout
self.layout_filters.addWidget( listbox, row_indx, col_indx, 1, 1 )
print(listbox, row_indx, col_indx)
def callback_filter_selection(self):
print('callback_filter_selection')
def toggle_visibility(self):
# Toggle the visibility state
self.is_visible = not self.is_visible
self.setVisible( self.is_visible )
class RootWin(QtWidgets.QMainWindow):
def __init__(self, *args, **kwargs):
# Call the parent constructor
super().__init__(*args, **kwargs)
# Initialize the application layout
self.init_app_layout()
# Initialize the application widgets
self.init_app_widgets()
def init_app_layout(self):
# Create a central frame
self.central_frame = QtWidgets.QFrame()
# Create a layout manager for the central frame/widget
self.central_layout = QtWidgets.QVBoxLayout()
# Connect the layout manager to the central frame
self.central_frame.setLayout( self.central_layout )
def init_app_widgets(self):
# Initialize a DataManager instance
self.data_manager = DataManager( label='Data Manager' )
# Add the data manager to the application
self.addDockWidget( QtCore.Qt.TopDockWidgetArea, self.data_manager )
# Connect the dock widget to a key press callback
self.shortcut = QtWidgets.QShortcut( QtGui.QKeySequence( 'Ctrl+D' ), self )
self.shortcut.activated.connect( self.data_manager.toggle_visibility )
def finalize_window(self):
# Set the central widget of the application
self.setCentralWidget( self.central_frame )
def main():
# Check for an existing Qt app instance
if not QtWidgets.QApplication.instance():
app = QtWidgets.QApplication( sys.argv )
else:
app = QtWidgets.QApplication.instance()
# Create a widget instance
main_app = RootWin()
# Show the application
main_app.show()
# Run the main event loop of the application
sys.exit( app.exec_() )
if __name__ == '__main__':
# Call the main routine
main()