Python File Traversal

Question

I am trying to create a function that takes in a the name of a root file then traverses through the directory and returns a list like this.

[["folder1",[
    ["subfolder1",[
        "file1",
        "file2"
    ]],
    ["subfolder2",[
        "file3",
        "file4"
    ]]
],"file5","file6"]

Below is my attempt at the function:

def traverse(rootdir):
    names = []
    for cdirname, dirnames, filenames in os.walk(rootdir):
        # record path to all subdirectories first.
        for subdirname in dirnames:
            names.append([subdirname,traverse(os.path.join(cdirname, subdirname))])

        # record path to all filenames.
        for filename in filenames:
            names.append(os.path.join(cdirname, filename))

    return names

My problem is that I always end up getting duplications of the same files/folders being recorded with the function and that I the paths are always shown relative to the "rootdir" instead of just the names of the respective file/folder. How do I weed out the duplicates? Additionally how could I make it so that it's not the full path that gets recorded.

If you do `filename` instead of `os.path.join(cdirname, filename)` then you don't get the full path. — SethMMorton, Jun 19 '13 at 06:24
Thank you, that should have been obvious to me! Don't know why I didn't see that! — Abram I, Jun 19 '13 at 22:07

falsetru · Accepted Answer · 2013-06-19T07:11:27.220

sorted is used to make directory come first. If you don't mind that order, just return names.

def traverse(rootdir):
    names = []
    dirs, files = [], []
    for filename in os.listdir(rootdir):
        filepath = os.path.join(rootdir, filename)
        if os.path.isdir(filepath):
            names.append([filename, traverse(filepath)])
        else:
            names.append(filename)
    return sorted(names, key=lambda x: (0, x[0]) if isinstance(x, list) else (1, x))

Another version that use os.walk:

def traverse(rootdir):
    names = []
    dir_to_names = {rootdir: names}
    for cdirname, dirnames, filenames in os.walk(rootdir):
        subnames = dir_to_names[cdirname]
        for subdirname in sorted(dirnames):
            subnames2 = dir_to_names[os.path.join(cdirname, subdirname)] = []
            subnames.append([subdirname, subnames2])
        for filename in sorted(filenames):
            subnames.append(filename)
    return names

score 0 · Answer 2 · answered Jun 19 '13 at 06:47

You could use os.walk() to get all subdirs and subfiles. It returns a list containing a "triple" with ('current path', [subdirs], [subfiles]). But that didn't work for my needs so I coded the following script. Hope this helps.

What it does is, that it creates an object for each folder containg the files and dirs and sorts them alphabetically. I looked at os.walk and how it works and this is a similar approach (with isdir()). The tab variable is just for a better look of the output.

import os


class Folder():
    """ Generate a tree list from a given directory """
    # List of prohibited_dirs folders on any levels
    prohibited_dirs = set([])
    prohibited_files = set([])
    tab = 0
    def __init__(self, path, folder_name):
        """ path should be /home/example, folder_name: example """
        self.path = path
        self.folder_name = folder_name
        self.sub_dirs = []
        self.sub_files = []
        self.__class__.tab += 1
        # print self.tab

    def sorter(self):
        """ sorts listdir output for folders and files"""
        # Sort Folders and Files
        names = os.listdir(self.path)
        for name in names:
            if os.path.isdir(os.path.join(self.path, name)):
                self.sub_dirs.append(name)
            else:
                self.sub_files.append(name)

    def list_stuff(self):
        """ sort lists, and iterate overall subfolders/files."""
        # Sort alphabetically
        self.sub_dirs.sort(key=str.lower)
        self.sub_files.sort(key=str.lower)
        # all subfolders, if is also break condition
        if self.sub_dirs:
            # Filter prohibited_dirs Folders
            for sub_dir in self.sub_dirs:
                if sub_dir in self.__class__.prohibited_dirs:
                    continue
                print "\t" * self.tab + sub_dir
                # Go deeper
                deeper = Folder(os.path.join(self.path, sub_dir), sub_dir)
                deeper.sorter()
                deeper.list_stuff()
                # Free object
                del deeper
                self.__class__.tab -= 1
        # list all Files, if is also break condition
        if self.sub_files:
            for sub_file in self.sub_files:
                if sub_file in self.__class__.prohibited_files:
                    continue
                print "\t" * self.tab + sub_file

STARTDIRECTORY = "."
STARTFOLDER = "."

runner = Folder(STARTDIRECTORY, STARTFOLDER)
runner.sorter()
runner.list_stuff()

Python File Traversal

2 Answers2