0

I'd like to walk a tree and end up with a list with a maximum of one file from each directory. The only criteria for the selected files are that they match a glob/pattern, for example "*.txt".

In case it's not already implicit I need the algorithm to return stable results.

e.g. Given a tree that looks like this:

a/some/entry/foo.html
a/some/entry/foo.txt
a/some/entry/bar.txt
a/some/entry/baz.txt
a/some/entry/baz.bmp
a/some/boo.bat
a/some/boo.txt
a/some/bat.txt
a/other/path/far.txt
a/other/path/faz.txt

One acceptable result would be:

"a/some/entry/bar.txt a/other/path/far.txt a/some/boo.txt"

It seems like GLOB_RECURSE and find_file aren't appropriate for this task, so is there another approach that I should consider?

Brian Cain
  • 14,403
  • 3
  • 50
  • 88
  • 1
    What is the criteria for which file you choose from each directory? The newest/oldest file? First alphabetically? – Kevin Jun 06 '19 at 14:57
  • "What is the criteria" -- good question, any is acceptable as long as they match a given pattern. – Brian Cain Jun 11 '19 at 05:47

3 Answers3

2

The list returned by file (GLOB_RECURSE ... is a superset of the desired result. Thus one possible solution is to post process the returned list by only selecting the first match returned for each directory.

The following CMake function sketches the necessary steps:

function (glob_recurse_first _globbingExpression _resultVar)
    file (GLOB_RECURSE _pathList "${_globbingExpression}")
    # canonical order
    list (SORT _pathList)
    # filter first match from each directory
    set (_firstMatched "")
    set (_latestDir "//")
    foreach (_path IN LISTS _pathList)
        get_filename_component(_dir "${_path}" DIRECTORY)
        if (NOT "${_dir}" STREQUAL "${_latestDir}")
            list (APPEND _firstMatched "${_path}")
            set (_latestDir "${_dir}")
        endif()
    endforeach()
    set (${_resultVar} ${_firstMatched} PARENT_SCOPE)
endfunction()

The function can be used in the following way:

glob_recurse_first("a/*.txt" _fileList)
foreach (_file ${_fileList})
    message (STATUS "${_file}")
endforeach()

The globbing expression "a/*.txt" will honor all files with the extension .txt in directory a and all its sub-directories.

The result variable _fileList contains absolute paths with one file per sub-directory in a.

sakra
  • 62,199
  • 16
  • 168
  • 151
1
SET(resultfilelist "")
FILE(GLOB_RECURSE children LIST_DIRECTORIES true "${CMAKE_CURRENT_SOURCE_DIR}/a/*")
FOREACH(child ${children})
    IF(IS_DIRECTORY ${child})
      #message("Processing ${child}")
      FILE(GLOB txtfiles LIST_DIRECTORIES false ${child}/*.txt)
      list(LENGTH txtfiles txtfilessize)
      IF(${txtfilessize} GREATER 0)
        list(GET txtfiles 0 txtfilesfirst)
        #message("Text file is  ${txtfilesfirst}")
        LIST(APPEND resultfilelist ${txtfilesfirst})
      ENDIF()
    ENDIF()
ENDFOREACH()

FOREACH(child ${resultfilelist})
 message("Text file is  ${child}")
ENDFOREACH()
Tania Chistyakova
  • 3,928
  • 6
  • 13
1

I am not sure but you can create an executable file for searching directories by a command

$ grep -R version | awk -F: '/CMake.*:/ {print $1}'

like this example

https://unix.stackexchange.com/a/105342

and you can save this result to a new file and include this file if you need futher development

https://stackoverflow.com/a/17655165/914284

Hamit YILDIRIM
  • 4,224
  • 1
  • 32
  • 35