2

i tryed few codes, but they works too slow. We have like half million files in server which i need to loop. And result is like only 3-4 k in 5 mins :) Maybe you guys idea how to do this code loop faster? thank you in advance

Option Explicit
Sub getfiles()

    Dim oFSO As Object
    Dim oFolder As Object
    Dim oFile As Object, sf
    Dim i As Integer, colFolders As New Collection, ws As Worksheet, y As Integer
    
    Set ws = ActiveSheet
    Set oFSO = CreateObject("Scripting.FileSystemObject")
    Set oFolder = oFSO.GetFolder('my path)
    
    colFolders.Add oFolder
    
    DoEvents
    Do While colFolders.Count > 0
        Set oFolder = colFolders(1)
        colFolders.Remove 1
    
        For Each oFile In oFolder.Files
            If Right(oFile.Name, 4) = ".pdf" Then
                ws.Cells(i + 1, 1) = oFolder.Path
                ws.Cells(i + 1, 2) = oFile.Name
                i = i + 1
                y = y + 1
                If y = 2000 Then 'just saving to check result after few minutes with pause
                    ThisWorkbook.Save
                    Application.Wait (Now + TimeValue("0:00:10"))
                    y = 0
                End If
            End If
        Next oFile
        For Each sf In oFolder.SubFolders
            colFolders.Add sf
        Next sf
    Loop

End Sub
FunThomas
  • 23,043
  • 3
  • 18
  • 34

3 Answers3

3

This should be faster:

Option Explicit

Sub getPdfFiles()
    Dim i As Long, ws As Worksheet
    Set ws = ActiveSheet
    
    Dim output As Object
    Set output = ShellOutput("Dir D:\BYoung\*.pdf /s /b /a:-d")
    
    Dim inLines() As String
    Dim sLine As String, lines As Long
    
    Do While Not output.AtEndOfStream
        sLine = output.ReadLine
    
        If Right(sLine, 4) = ".pdf" Then
            i = i + 1
            ReDim Preserve inLines(1 To i)
            inLines(i) = sLine
        End If
        'If i Mod 100 = 0 Then DoEvents
    Loop
    lines = i
    
    Dim fName As String, fFull As String, fPath As String
    Dim outLines As Variant
    ReDim outLines(1 To lines, 1 To 2)
    
    For i = 1 To lines
        fFull = inLines(i)
        fName = Right(fFull, Len(fFull) - InStrRev(fFull, "\"))
        fPath = Mid(fFull, 1, Len(fFull) - Len(fName))
        outLines(i, 1) = fPath
        outLines(i, 2) = fName
        'If i Mod 100 = 0 Then DoEvents
    Next i
    
    ws.Range("A1:B" & lines) = outLines
    
    ws.Parent.Save
End Sub

Which calls this function:

' Create a Shell, executes a command, and returns the output stream
'(from @BrianBurns at https://stackoverflow.com/a/32600510/109122)
Public Function ShellOutput(sCmd As String) As Object
    Dim oShell As Object
    Set oShell = CreateObject("WScript.Shell")

    'run command
    Dim oExec As Object
    Dim oOutput As Object
    Set oExec = oShell.Exec("cmd.exe /c " & sCmd)
    Set oOutput = oExec.StdOut

    Set ShellOutput = oOutput
End Function
RBarryYoung
  • 55,398
  • 14
  • 96
  • 137
  • 1
    Great. For 350k files, it took 10s on my machine. There are a few issues though. `i` needs to be declared as long. `y` needs to be removed. To exclude a directory named `MyFiles.PDF` you could add the switch `a:-d`. `ReDim inLines(1 To 1)` is redundant since you have used the 'array parentheses' when declaring. You forgot to remove the line `ReDim Preserve inLines(1 To i + 1)`. Since you have used the `ActiveSheet` for the worksheet, for the workbook it would be more correct to use `ws.Parent` instead of `ThisWorkbook`. Could you share what exactly the purpose of `DoEvents` is in this code? – VBasic2008 Dec 12 '21 at 11:36
  • @VBasic2008 Good points, I was in a rush when I posted this (leaving for a trip) and didn't clean-up completely, I will make some changes. RE: DoEvents, I started by taking the OPs code, testing it in my environment and then transforming the necessary parts. When I do that, I usually put in `DoEvents` so that I can break out of any infinite or long-running loops, even in my own code (in case of mistake), without having to restart and recover my Spreadsheet. It's probably unnecessary now. – RBarryYoung Dec 12 '21 at 13:30
  • (I was talking about the 2nd `DoEvents`, which I added, the first one was leftover from the original code). – RBarryYoung Dec 12 '21 at 13:37
  • @VBasic2008 Ok, I've cleaned it up, there was other stuff too, most importantly that I forgot the attribution to BrianBurns for the Shell-wrapper function. This was sloppy of me, and I apologize. – RBarryYoung Dec 12 '21 at 13:46
1

Another option using Dir() for files:


Sub Tester()
   Dim matches As Collection
   
   Set matches = GetMatches("C:\Test", "*.pdf")

   'loop matches and list file info...
    
End Sub


'Return a collection of file objects given a starting folder and a file pattern
'  e.g. "*.txt"
'Pass False for last parameter if don't want to check subfolders
Function GetMatches(startFolder As String, filePattern As String, _
                    Optional subFolders As Boolean = True) As Collection

    Dim fso, fldr, f, subFldr, fpath
    Dim colFiles As New Collection
    Dim colSub As New Collection
    
    Set fso = CreateObject("scripting.filesystemobject")
    colSub.Add startFolder
    
    Do While colSub.Count > 0
        
        Set fldr = fso.getfolder(colSub(1))
        colSub.Remove 1
        
        If subFolders Then
            For Each subFldr In fldr.subFolders
                colSub.Add subFldr.Path
            Next subFldr
        End If
        
        fpath = fldr.Path
        If Right(fpath, 1) <> "\" Then fpath = fpath & "\"
        f = Dir(fpath & filePattern)'Dir is faster...
        Do While Len(f) > 0
            colFiles.Add fso.getfile(fpath & f)
            f = Dir()
        Loop
    Loop
    Set GetMatches = colFiles
End Function
Tim Williams
  • 154,628
  • 8
  • 97
  • 125
1

Write a List of Files to a Worksheet

  • Copy the code to a standard module, e.g. Module1, in the workbook with the worksheet you plan to write to.
  • Then, before running the one-liner WritePdfFoldersAndFiles procedure, carefully adjust the arguments of the called procedure (WriteFoldersAndFiles) in it.
  • To learn about the Dir parameters (switches) visit the Microsoft Docs' dir page.
Option Explicit

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Purpose:      From a folder ('"C:\"') and all its subfolders ('/s'), returns
'               all PDF ("*.pdf") file names (second column) and their
'               folder paths (first column) in two columns of worksheet 'Sheet1'
'               in the workbook containing this code, starting with cell 'A2'.
'               The '/b' switch is necessary to get the file paths.
'               The 'a-d' switch is necessary to exclude directories.
' Remarks:      If you select all files (*.*) on your system drive (e.g. 'C:\'),
'               there may be a different number of files on each run due to
'               newly created logs, temp files, or whatnot.
' Calls:        'WriteFoldersAndFiles'
'                   'ArrFilePaths'
'                   'GetFoldersAndFiles'
'                       'SplitStringByLastCharToRow'
'                   'WriteDataSimple'
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Sub WritePdfFoldersAndFiles()
     
    ' Adjust (play with) the values in this line:
    WriteFoldersAndFiles "C:\", "*.pdf", "/s/b/a-d", "Sheet1", "A2"

End Sub

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Purpose:      Writes a list of folder paths and file names to two columns
'               of a worksheet in the workbook containing this code.
' Calls:        'ArrFilePaths'
'               'GetFoldersAndFiles'
'                   'SplitStringByLastCharToRow'
'               'WriteDataSimple'
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Sub WriteFoldersAndFiles( _
        ByVal FolderPath As String, _
        Optional ByVal DirPattern As String = "*.*", _
        Optional ByVal DirSwitches As String = "/s/b/a-d", _
        Optional ByVal WorksheetName As String = "Sheet1", _
        Optional ByVal FirstCellAddress As String = "A1")
    Const ProcName As String = "WriteFoldersAndFiles"
    On Error GoTo ClearError
    
    Const tf As String = "0.0000"
    Dim t As Double, tt As Double, tc As Double, ti As Double
    ti = Timer: t = ti

    ' Write the file paths to an array ('fPaths').
    Dim fPaths() As String
    fPaths = ArrFilePaths(FolderPath, DirPattern, DirSwitches)
    
    tc = Timer: Debug.Print "ArrFilePaths...          " _
        & Format(tc - t, tf) & "(" & Format(tc - ti, tf) & ")": t = tc

    ' Split the file paths by the last path separator, to folder paths
    ' and file names, into a 2D one-based two-column array ('Data').
    Dim Data As Variant: Data = GetFoldersAndFiles(fPaths)
    'Erase fPaths ' probably nothing to gain
    
    tc = Timer: Debug.Print "GetFoldersAndFiles...    " _
        & Format(tc - t, tf) & "(" & Format(tc - ti, tf) & ")": t = tc

    ' Write the values from the 2D array to a worksheet in the workbook
    ' containing this code ('ThisWorkbook').
    WriteDataSimple ThisWorkbook.Worksheets(WorksheetName) _
        .Range(FirstCellAddress), Data
    
    tc = Timer: Debug.Print "WriteDataSimple...       " _
        & Format(tc - t, tf) & "(" & Format(tc - ti, tf) & ")": t = tc

ProcExit:
    Exit Sub
ClearError:
    Debug.Print "'" & ProcName & "': Unexpected Error!" & vbLf _
              & "    " & "Run-time error '" & Err.Number & "':" & vbLf _
              & "    " & Err.Description
    Resume ProcExit
End Sub

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Purpose:      Returns the file paths of the files in a folder in an array.
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Function ArrFilePaths( _
    ByVal FolderPath As String, _
    Optional ByVal FilePattern As String = "*.*", _
    Optional ByVal DirSwitches As String = "/s/b/a-d") _
As Variant
    Const ProcName As String = "ArrFilePaths"
    On Error GoTo ClearError
    
    Dim pSep As String: pSep = Application.PathSeparator
    If Right(FolderPath, 1) <> pSep Then FolderPath = FolderPath & pSep
    Dim ExecString As String ' '%comspec%' or 'cmd.exe' ?
    ExecString = "%comspec% /c Dir """ _
        & FolderPath & FilePattern & """ " & DirSwitches
    ArrFilePaths = Split(CreateObject("WScript.Shell") _
        .Exec(ExecString).StdOut.ReadAll, vbCrLf)

ProcExit:
    Exit Function
ClearError:
    Debug.Print "'" & ProcName & "': Unexpected Error!" & vbLf _
              & "    " & "Run-time error '" & Err.Number & "':" & vbLf _
              & "    " & Err.Description
    Resume ProcExit
End Function

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Purpose:      From an array containing file paths, returns
'               the folder paths and the file names in two columns
'               of a 2D one-based two-column array.
' Calls:        'SplitStringByLastCharToRow'.
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Function GetFoldersAndFiles( _
    ByVal FilePaths As Variant) _
As Variant
    Const ProcName As String = "GetFoldersAndFiles"
    On Error GoTo ClearError

    Dim pSep As String: pSep = Application.PathSeparator
    Dim nLB As Long: nLB = LBound(FilePaths)
    Dim nUB As Long: nUB = UBound(FilePaths)
    
    Dim Data() As String: ReDim Data(1 To nUB - nLB, 1 To 2)
    
    Dim r As Long
    Dim n As Long
    
    For n = nLB To nUB - 1 ' last item is an empty string
        r = r + 1
        SplitStringByLastCharToRow Data, r, FilePaths(n), pSep
    Next n
    
    GetFoldersAndFiles = Data

ProcExit:
    Exit Function
ClearError:
    Debug.Print "'" & ProcName & "': Unexpected Error!" & vbLf _
              & "    " & "Run-time error '" & Err.Number & "':" & vbLf _
              & "    " & Err.Description
    Resume ProcExit
End Function

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Purpose:      Writes the values from a 2D one-based array ('Data') to a range
'               defined by its first cell ('FirstCell') and by the size
'               of the array. Optionally (by default), previously
'               clears the columns (preserving the data above (e.g. headers)).
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Sub WriteDataSimple( _
        ByVal FirstCell As Range, _
        ByVal Data As Variant, _
        Optional ByVal DoClearContents As Boolean = True)
    Const ProcName As String = "WriteDataSimple"
    On Error GoTo ClearError
    
    With FirstCell.Resize(, UBound(Data, 2))
        If DoClearContents Then
            .Resize(.Worksheet.Rows.Count - .Row + 1).Clear
        End If
        .Resize(UBound(Data, 1)).Value = Data
    End With

ProcExit:
    Exit Sub
ClearError:
    Debug.Print "'" & ProcName & "': Unexpected Error!" & vbLf _
              & "    " & "Run-time error '" & Err.Number & "':" & vbLf _
              & "    " & Err.Description
    Resume ProcExit
End Sub

''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
' Purpose:      Splits a string ('SplitString') by the last occurrence
'               of a character ('SplitChar') and writes the two split strings
'               to the first two columns in a row ('DataRow')
'               of a 2D one-based array ('Data').
''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''''
Sub SplitStringByLastCharToRow( _
        ByRef Data As Variant, _
        ByVal DataRow As Long, _
        ByVal SplitString As String, _
        ByVal SplitChar As String)
    Const ProcName As String = "SplitStringByLastCharToRow"
    On Error GoTo ClearError
    
    Dim lcPos As Long: lcPos = InStrRev(SplitString, SplitChar)
    If lcPos = 0 Then Exit Sub
    
    Data(DataRow, 1) = Mid(SplitString, 1, lcPos - 1)
    Data(DataRow, 2) = Mid(SplitString, lcPos + 1, Len(SplitString) - lcPos)
'    Data(DataRow, 1) = Left(SplitString, lcPos - 1)
'    Data(DataRow, 2) = Right(SplitString, Len(SplitString) - lcPos)

ProcExit:
    Exit Sub
ClearError:
    Debug.Print "'" & ProcName & "': Unexpected Error!" & vbLf _
              & "    " & "Run-time error '" & Err.Number & "':" & vbLf _
              & "    " & Err.Description
    Resume ProcExit
End Sub
VBasic2008
  • 44,888
  • 5
  • 17
  • 28