EDIT: answer is updated to change the grouping to above and to correct a condition that incorrectly indented and grouped the rows.
So it was an interesting problem to solve. In addition to the actual solution, I have some other tips that I normally include in my code that I'll call out as well. My solution is VERY fast as well. When I parsed the C:\Program Files\ directory tree (18,017 files), it ran in under 5 seconds.
- Declare your variables as close as possible to the point where they are used for the first time. This makes it much easier to determine the variable type and definition, and also helps to functionally group the code.
- Those logical groups can then be functionally isolated into separate functions and subs. This will make the main logic of your code much easier to grasp in a single quick view, rather than requiring the reader (probably YOU in a few months) to re-read large logic sections and digest it in order to understand it.
In my example code, I start off with three quick functions that tell you exactly what's going on:
Dim rootFolder As String
rootFolder = SelectFolder
Dim pathArray As Variant
pathArray = GetAllFiles(rootFolder)
Dim folderGroups As Object
Set folderGroups = BuildFolderDictionary(pathArray)
The first function is straightforward and closely follows your approach to selecting the root folder:
Private Function SelectFolder() As String
'--- returns the user-selected folder as a string
Dim objShell As Object
Dim objFolder As Object
Set objShell = CreateObject("Shell.Application")
Set objFolder = objShell.BrowseForFolder(0, "Select Folder", 0, 17)
If Not objFolder Is Nothing Then
SelectFolder = objFolder.self.path
End If
End Function
The next function (GetAllFiles
) also uses your approach but instead of putting the results directly into the worksheet, it keeps the results in a memory-based array (at the bottom of this answer, I include the whole module in a single code block for copy/paste later):
Private Function GetAllFiles(ByVal rootPath As String, _
Optional onlyFolders As Boolean = False) As Variant
'--- returns a sorted array of all filepaths in the given directory path
Dim dirOptions As String
If onlyFolders Then
dirOptions = """ /a:d-h-s /b /s"
Else
dirOptions = """ /a:-h-s /b /s"
End If
Dim fOut() As String
fOut = Split(CreateObject("WScript.Shell").exec("cmd /c dir """ & _
rootPath & _
dirOptions).StdOut.ReadAll, _
vbNewLine)
QuickSort fOut, LBound(fOut), UBound(fOut)
'--- the pathArray skips the first position from the fOut array
' because it's always blank, but add the root folder as the first entry
Dim pathArray As Variant
ReDim pathArray(1 To UBound(fOut) + 1, 1 To 1)
pathArray(1, 1) = rootPath
Dim i As Long
For i = 2 To UBound(fOut) + 1
pathArray(i, 1) = fOut(i - 1)
Next i
GetAllFiles = pathArray
End Function
Memory-based arrays are MUCH, MUCH faster than working directly off the worksheet with Cells
or Ranges
.
The next function (BuildFolderDictionary
) works using the array of paths and works to build a list (a Dictionary
) of unique folders within the folder hierarchy. Along the way, it also creates a "span" of the rows that the subfolder encompasses. This will be very useful later. Remember, we're doing all this in memory so it's quick.
Private Function BuildFolderDictionary(ByRef paths As Variant) As Object
Dim folders As Object
Set folders = CreateObject("Scripting.Dictionary")
'--- scan all paths and create a dictionary of each folder and subfolder
' noting which items (rows) map into each dictionary
Dim i As Long
For i = LBound(paths) To UBound(paths)
Dim folder As String
Dim pos1 As Long
If Not IsEmpty(paths(i, 1)) Then
pos1 = InStrRev(paths(i, 1), "\") 'find the last folder separator
folder = Left$(paths(i, 1), pos1)
If Not folders.Exists(folder) Then
'--- new (sub)folder, create a new entry
folders.Add folder, CStr(i) & ":" & CStr(i)
Else
'--- extisting (sub)folder, add to the row range
Dim rows As String
rows = folders(folder)
rows = Left$(rows, InStr(1, rows, ":"))
rows = rows & CStr(i)
folders(folder) = rows
End If
End If
Next i
'--- final fixup: the root folder group should always encompass all
' the entries (runs from the second row to the end)...
' and we'll also determine the indent level using the first entry
' as the baseline (level 1). stored as "rows,level" e.g. "2:7,1"
Dim rootSlashes As Long
rootSlashes = Len(root) - Len(Replace(root, "\", "")) - 1
folders(root) = "2:" & UBound(paths) & ",1"
Dim slashes As Long
folder = folders.Keys
For i = 1 To UBound(folder)
slashes = Len(folder(i)) - Len(Replace(folder(i), "\", ""))
folders(folder(i)) = folders(folder(i)) & "," & _
CStr(slashes - rootSlashes)
Next i
For Each folder In folders
Debug.Print folder & " - " & folders(folder)
Next folder
Set BuildFolderDictionary = folders
End Function
The final two parts are to copy the memory array (of file paths) to the worksheet...
Const START_ROW As Long = 6
Dim pathRange As Range
Set pathRange = Sheet1.Range("A" & START_ROW).Resize(UBound(pathArray) + 1, 1)
pathRange = pathArray
and then apply both the indentation and grouping of the rows. We're using the dictionary of folder groups we created that has all of the subfolder rows nicely defined for us already...
Const MAX_GROUP_LEVEL As Long = 8
Dim rowGroup As Variant
Dim level As Long
Dim folderData As Variant
Dim theseRows As String
For Each rowGroup In folderGroups
folderData = Split(folderGroups(rowGroup), ",")
theseRows = folderData(0)
level = folderData(1)
With pathRange.rows(theseRows)
.IndentLevel = level
If level < MAX_GROUP_LEVEL Then
.Group
End If
End With
Next rowGroup
(I ran into an issue during testing when the program errored with a group level deeper than 8. So I put a limit in the logic to prevent the error.)
So now, the whole module in a single block:
Option Explicit
Public Sub ShowFilePaths()
Dim rootFolder As String
rootFolder = SelectFolder
If rootFolder = vbNullString Then Exit Sub
'--- quick fixup if needed
rootFolder = rootFolder & IIf(Right$(rootFolder, 1) = "\", vbNullString, "\")
Dim pathArray As Variant
pathArray = GetAllFiles(rootFolder)
Dim folderGroups As Object
Set folderGroups = BuildFolderDictionary(rootFolder, pathArray)
'--- when debugging, this block just clears the worksheet to make it
' easier to rerun and test the code
On Error Resume Next
With Sheet1
.UsedRange.ClearOutline
.UsedRange.Clear
.Outline.SummaryRow = xlAbove
End With
Err.Clear
On Error GoTo 0
'--- copy the array to the worksheet
Const START_ROW As Long = 6
Dim pathRange As Range
Set pathRange = Sheet1.Range("A" & START_ROW).Resize(UBound(pathArray), 1)
pathRange = pathArray
'------ now apply the indention levels to each line on the sheet
' and group the same rows
Const MAX_GROUP_LEVEL As Long = 8
Dim rowGroup As Variant
Dim level As Long
Dim folderData As Variant
Dim theseRows As String
For Each rowGroup In folderGroups
folderData = Split(folderGroups(rowGroup), ",")
theseRows = folderData(0)
level = folderData(1)
With pathRange.rows(theseRows)
.IndentLevel = level
If level < MAX_GROUP_LEVEL Then
.Group
End If
End With
Next rowGroup
End Sub
Private Function SelectFolder() As String
'--- returns the user-selected folder as a string
Dim objShell As Object
Dim objFolder As Object
Set objShell = CreateObject("Shell.Application")
Set objFolder = objShell.BrowseForFolder(0, "Select Folder", 0, 17)
If Not objFolder Is Nothing Then
SelectFolder = objFolder.self.Path
End If
End Function
Private Function GetAllFiles(ByVal rootPath As String, _
Optional onlyFolders As Boolean = False) As Variant
'--- returns a sorted array of all filepaths in the given directory path
Dim dirOptions As String
If onlyFolders Then
dirOptions = """ /a:d-h-s /b /s"
Else
dirOptions = """ /a:-h-s /b /s"
End If
Dim fOut() As String
fOut = Split(CreateObject("WScript.Shell").exec("cmd /c dir """ & _
rootPath & _
dirOptions).StdOut.ReadAll, _
vbNewLine)
QuickSort fOut, LBound(fOut), UBound(fOut)
'--- the pathArray skips the first position from the fOut array
' because it's always blank, but add the root folder as the first entry
Dim pathArray As Variant
ReDim pathArray(1 To UBound(fOut) + 1, 1 To 1)
pathArray(1, 1) = rootPath
Dim i As Long
For i = 2 To UBound(fOut) + 1
pathArray(i, 1) = fOut(i - 1)
Next i
GetAllFiles = pathArray
End Function
Private Function BuildFolderDictionary(ByVal root As String, _
ByRef paths As Variant) As Object
Dim folders As Object
Set folders = CreateObject("Scripting.Dictionary")
'--- scan all paths and create a dictionary of each folder and subfolder
' noting which items (rows) map into each dictionary
Dim folder As Variant
Dim i As Long
For i = LBound(paths) To UBound(paths)
Dim pos1 As Long
If Not IsEmpty(paths(i, 1)) Then
pos1 = InStrRev(paths(i, 1), "\") 'find the last folder separator
folder = Left$(paths(i, 1), pos1)
If Not folders.Exists(folder) Then
'--- new (sub)folder, create a new entry
folders.Add folder, CStr(i) & ":" & CStr(i)
Else
'--- extisting (sub)folder, add to the row range
Dim rows As String
rows = folders(folder)
rows = Left$(rows, InStr(1, rows, ":"))
rows = rows & CStr(i)
folders(folder) = rows
End If
End If
Next i
'--- final fixup: the root folder group should always encompass all
' the entries (runs from the second row to the end)...
' and we'll also determine the indent level using the first entry
' as the baseline (level 1). stored as "rows,level" e.g. "2:7,1"
Dim rootSlashes As Long
rootSlashes = Len(root) - Len(Replace(root, "\", "")) - 1
folders(root) = "2:" & UBound(paths) & ",1"
Dim slashes As Long
folder = folders.Keys
For i = 1 To UBound(folder)
slashes = Len(folder(i)) - Len(Replace(folder(i), "\", ""))
folders(folder(i)) = folders(folder(i)) & "," & _
CStr(slashes - rootSlashes)
Next i
For Each folder In folders
Debug.Print folder & " - " & folders(folder)
Next folder
Set BuildFolderDictionary = folders
End Function
Private Sub QuickSort(ByRef Field() As String, ByVal LB As Long, ByVal UB As Long)
'--- from https://stackoverflow.com/a/152333/4717755
Dim P1 As Long, P2 As Long, Ref As String, TEMP As String
P1 = LB
P2 = UB
Ref = Field((P1 + P2) / 2)
Do
Do While (Field(P1) < Ref)
P1 = P1 + 1
Loop
Do While (Field(P2) > Ref)
P2 = P2 - 1
Loop
If P1 <= P2 Then
TEMP = Field(P1)
Field(P1) = Field(P2)
Field(P2) = TEMP
P1 = P1 + 1
P2 = P2 - 1
End If
Loop Until (P1 > P2)
If LB < P2 Then Call QuickSort(Field, LB, P2)
If P1 < UB Then Call QuickSort(Field, P1, UB)
End Sub