What I need:
I often need to rearrange multidimensional arrays, especially with timestamps. For that I need a routine, that results in a permanent sort order. Since the data can be huge, it has to be performant as possible.
I would like to have some feedback to my current efforts. I'm trying to understand sorting arrays practical. I'm not a programmer, if possible be patient. :)
I'll appreciate every help/tips! I'm going to learn some new things maybe.
What my efforts are so far:
For the beginning I took the bubble sort algorithm. It does what is needed, BUT its performance is very low. It needs more than 20 seconds for sorting a column within 582 rows and 114 columns.
The code works with single- and multi-column-arrays. I use regular expressions, so keep in mind the little function a the end of the code.
I've commented my code step by step, I hope its still readable.
I know QuickSort would be much faster, but I haven't understand to make this algorithm permanent/stable yet. I've found this solution Sorting a multidimensionnal array in VBA, but as said, its not permanent.
Especially for Excel I know the way to copy an array to a worksheet and to sort it there. My goal is to avoid this solution. :)
'++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
'+
'+ BubbleSort_Array
'+
'+ Sort algorithm: BubbleSort
'+ Sorts by: 1. numbers, 2. dates, 3. Zeichenketten (also with consecutive number, e.g. "Book1,Book2,Book3..."; Capital letters before small letters)
'+ Parameter "Data": Requires an array (VARIANT) with one or more columns and rows, by reference
'+ Paramater "Column" is a LONG, follows the counting by "Option Base 0" (first column = 0)
'+ Parameter "Direction" is an EXCEL-based constant, that determines the sortdirection (ascending/descending)
'+
'++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Public Sub BubbleSort_Array( _
ByRef Data() As Variant, _
Optional Column As Long = -1, _
Optional Direction As XlSortOrder = 1 _
)
Dim InnerIndex As Long 'common variable, for the inner loop
Dim OuterIndex As Long 'common variable, for the outer loop
Dim SwapItem As Variant 'variable to temporarily save content, that could be swapped with another item
Dim SwapItem2 As Variant 'variable to temporarily save content, that could be swapped with another item
Dim ErrNum As Long 'variable for error number of the ERR-object
Dim lngRow As Long 'common variable for the rows of an array
Dim lngColumn As Long 'common variable for the column of an array
Dim colNumber As New Collection 'variable to save a part of digits from an entry
Dim colText As New Collection 'variable to save a part of text from an entry
Dim colDates As New Collection 'variable to save dates from an entry
Dim SortIndex() As Variant 'array for sorting and mapping the specified COLUMN
Dim CopyData() As Variant 'array for the original data, but sorted
'Check, whether the given array is a one- or multi-column array
On Error Resume Next
ErrNum = UBound(Data, 2)
ErrNum = Err.Number
On Error GoTo 0
'If there is an error and the parameter COLUMN is still -1 the parameter DATA is an one-column-array
If ErrNum > 0 And Column = -1 Then
'Outer loop
For OuterIndex = LBound(Data) To UBound(Data)
'Inner loop
For InnerIndex = LBound(Data) To UBound(Data)
'Execute the following statement as long the current index is not the last one (it would throw an error 9 by trying to access the next item)
If InnerIndex < UBound(Data) Then
'To differentiate between the values
'Check, whether the value and the next value are dates
If VBA.IsDate(Data(InnerIndex)) And VBA.IsDate(Data(InnerIndex + 1)) Then
'Save the dates in a temporary collection
colDates.Add VBA.CDate(Data(InnerIndex)), "date1"
colDates.Add VBA.CDate(Data(InnerIndex + 1)), "date2"
Else
'If both values are not dates, split the value in case it is a STRING with an number at the end
'like "Paper1", "Paper2" etc.
colNumber.Add RegEx_Replace(Data(InnerIndex), ".*(\d+$)", "$1"), "current"
colNumber.Add RegEx_Replace(Data(InnerIndex + 1), ".*(\d+$)", "$1"), "next"
colText.Add RegEx_Replace(Data(InnerIndex), "(.*)\d+$", "$1"), "current"
colText.Add RegEx_Replace(Data(InnerIndex + 1), "(.*)\d+$", "$1"), "next"
End If
'Check, whether the sortdirection is ascending
If Direction = xlAscending Then
'Sort by date
If VBA.IsDate(Data(InnerIndex)) And VBA.IsDate(Data(InnerIndex + 1)) Then
'Check the items depending from the sortdirection
If VBA.CDbl(colDates("date1")) > VBA.CDbl(colDates("date2")) Then
'In case the first item is bigger then the second, swap the items
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
'Sort by strings with consecutive number
ElseIf VBA.IsNumeric(colNumber("current")) And VBA.IsNumeric(colNumber("next")) _
And (colText("current") = colText("next")) Then
'In case the first item is bigger then the second, swap the items
If colNumber("current") > colNumber("next") Then
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
Else
'Sort by strings
'In case the first item is bigger then the second, swap the items
If Data(InnerIndex) > Data(InnerIndex + 1) Then
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
End If
'Sort descending
Else
'Sort descending
'Sort by date
If VBA.IsDate(Data(InnerIndex)) And VBA.IsDate(Data(InnerIndex + 1)) Then
If VBA.CDbl(colDates("date1")) < VBA.CDbl(colDates("date2")) Then
'In case the first item is smaller then the second, swap the items
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
'Sort by strings with consecutive number
ElseIf VBA.IsNumeric(colNumber("current")) And VBA.IsNumeric(colNumber("next")) And _
(colText("current") = colText("next")) Then
'In case the first item is smaller then the second, swap the items
If colNumber("current") < colNumber("next") Then
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
Else
'Sort by strings
'In case the first item is smaller then the second, swap the items
If Data(InnerIndex) < Data(InnerIndex + 1) Then
SwapItem = Data(InnerIndex)
Data(InnerIndex) = Data(InnerIndex + 1)
Data(InnerIndex + 1) = SwapItem
End If
End If
End If
End If
Set colNumber = Nothing
Set colText = Nothing
Set colDates = Nothing
Next
Next
Else
'Resize the array SortIndex for sorting the specified COLUMN
'Needs two columns: One for the index of the original data, and one for the values to be sorted
ReDim SortIndex(UBound(Data, 1), 1)
For InnerIndex = LBound(Data, 1) To UBound(Data, 1)
'Save index of the original data
SortIndex(InnerIndex, 0) = InnerIndex
'Save values of the specified COLUMN
SortIndex(InnerIndex, 1) = Data(InnerIndex, Column)
Next
'Outer loop
For OuterIndex = LBound(SortIndex, 1) To UBound(SortIndex, 1)
'Inner loop
For InnerIndex = LBound(SortIndex, 1) To UBound(SortIndex, 1)
'Execute the following statement as long the current index is not the last one (it would throw an error 9 by trying to access the next item)
If InnerIndex < UBound(SortIndex, 1) Then
'To differentiate between the values
'Check, whether the value and the next value are dates
If VBA.IsDate(SortIndex(InnerIndex, 1)) And VBA.IsDate(SortIndex(InnerIndex + 1, 1)) Then
'Save the dates in a temporary collection
colDates.Add VBA.CDate(SortIndex(InnerIndex, 1)), "date1"
colDates.Add VBA.CDate(SortIndex(InnerIndex + 1, 1)), "date2"
Else
'If both values are not dates, split the value in case it is a STRING with an number at the end
'like "Paper1", "Paper2" etc.
colNumber.Add RegEx_Replace(SortIndex(InnerIndex, 1), ".*(\d+$)", "$1"), "current"
colNumber.Add RegEx_Replace(SortIndex(InnerIndex + 1, 1), ".*(\d+$)", "$1"), "next"
colText.Add RegEx_Replace(SortIndex(InnerIndex, 1), "(.*)\d+$", "$1"), "current"
colText.Add RegEx_Replace(SortIndex(InnerIndex + 1, 1), "(.*)\d+$", "$1"), "next"
End If
'Check the sortdirection
If Direction = xlAscending Then
'Sort by date
If VBA.IsDate(SortIndex(InnerIndex, 1)) And VBA.IsDate(SortIndex(InnerIndex + 1, 1)) Then
If VBA.CDbl(colDates("date1")) > VBA.CDbl(colDates("date2")) Then
'In case the first item is bigger then the second, swap the items
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
'Sort by strings with consecutive numbers
ElseIf VBA.IsNumeric(colNumber("current")) And VBA.IsNumeric(colNumber("next")) _
And (colText("current") = colText("next")) Then
'In case the first item is bigger then the second, swap the items
If colNumber("current") > colNumber("next") Then
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
Else
'Sort by strings
'In case the first item is bigger then the second, swap the items
If SortIndex(InnerIndex, 1) > SortIndex(InnerIndex + 1, 1) Then
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
End If
Else
'Sort descending
'Sort by dates
If VBA.IsDate(SortIndex(InnerIndex, 1)) And VBA.IsDate(SortIndex(InnerIndex + 1, 1)) Then
'In case the first item is smaller then the second, swap the items
If VBA.CDbl(colDates("date1")) < VBA.CDbl(colDates("date2")) Then
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
'Sort by strings with consecutive numbers
ElseIf VBA.IsNumeric(colNumber("current")) And VBA.IsNumeric(colNumber("next")) And _
(colText("current") = colText("next")) Then
'In case the first item is smaller then the second, swap the items
If colNumber("current") < colNumber("next") Then
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
Else
'Sort by strings
If SortIndex(InnerIndex, 1) < SortIndex(InnerIndex + 1, 1) Then
'In case the first item is smaller then the second, swap the items
SwapItem = SortIndex(InnerIndex, 0)
SwapItem2 = SortIndex(InnerIndex, 1)
SortIndex(InnerIndex, 0) = SortIndex(InnerIndex + 1, 0)
SortIndex(InnerIndex, 1) = SortIndex(InnerIndex + 1, 1)
SortIndex(InnerIndex + 1, 0) = SwapItem
SortIndex(InnerIndex + 1, 1) = SwapItem2
End If
End If
End If
End If
Set colNumber = Nothing
Set colText = Nothing
Set colDates = Nothing
Next
Next
'Resize a new array with the same size like the original DATA
ReDim CopyData(UBound(Data, 1), UBound(Data, 2))
'Write the data according to the array SortIndex (= sorts the whole original data)
For lngRow = LBound(Data, 1) To UBound(Data, 1)
For lngColumn = LBound(Data, 2) To UBound(Data, 2)
CopyData(lngRow, lngColumn) = Data(SortIndex(lngRow, 0), lngColumn)
Next
Next
'Overwrite the original data with the sorted data
For lngRow = LBound(Data, 1) To UBound(Data, 1)
For lngColumn = LBound(Data, 2) To UBound(Data, 2)
Data(lngRow, lngColumn) = CopyData(lngRow, lngColumn)
Next
Next
End If
End Sub
'+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
'+
'+ RegEx_Replace
'+
'+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Public Function RegEx_Replace( _
varString As Variant, _
strSearchPattern As String, _
strReplaceString As String, _
Optional blnCase_Insensitive As Boolean = True, _
Optional blnGlobalSearch As Boolean = True, _
Optional blnMultiLine As Boolean = False _
) As String
Dim RegEx As Object
Set RegEx = CreateObject("vbscript.regexp")
With RegEx
.IgnoreCase = blnCase_Insensitive
.Global = blnGlobalSearch
.MultiLine = blnMultiLine
.Pattern = strSearchPattern
End With
RegEx_Replace = RegEx.Replace(varString, strReplaceString)
End Function