19

I've got a problem with this code in node.js. I want to recursively walk through a directory tree and apply the callback action to every file in the tree. This is my code at the moment:

var fs = require("fs");

// General function
var dive = function (dir, action) {
  // Assert that it's a function
  if (typeof action !== "function")
    action = function (error, file) { };

  // Read the directory
  fs.readdir(dir, function (err, list) {
    // Return the error if something went wrong
    if (err)
      return action(err);

    // For every file in the list
    list.forEach(function (file) {
      // Full path of that file
      path = dir + "/" + file;
      // Get the file's stats
      fs.stat(path, function (err, stat) {
        console.log(stat);
        // If the file is a directory
        if (stat && stat.isDirectory())
          // Dive into the directory
          dive(path, action);
        else
          // Call the action
          action(null, path);
      });
    });
  });
};

The problem is that in the for each loop stat is called for every file via the variable path. When the callback is called, path already has another value and so it dives into the wrong directories or calls the action for the wrong files.

Probably this problem could easily get solved by using fs.statSync, but this is not the solution I would prefer, since it is blocking the process.

tshepang
  • 12,111
  • 21
  • 91
  • 136
pvorb
  • 7,157
  • 7
  • 47
  • 74

7 Answers7

16

var path = dir + "/" + file;

You forgot to make path a local variable. Now it won't be changed behind your back in the loop.

Raynos
  • 166,823
  • 56
  • 351
  • 396
  • 7
    This would have been caught by putting `"use strict";` at the top of your file. – Domenic Apr 18 '12 at 13:09
  • 6
    For cross-platform compatibility, use node's [path.join](http://nodejs.org/api/path.html#path_path_join_path1_path2) function rather than appending a raw `"/"`. – Matt Mc Nov 05 '14 at 04:47
12

Use node-dir for this. Because you need a separate action for directories and files, I'll give you 2 simple iterators using node-dir.

Asynchronously iterate the files of a directory and its subdirectories and pass an array of file paths to a callback.

var dir = require('node-dir');

dir.files(__dirname, function(err, files) {
  if (err) throw err;
  console.log(files);
  //we have an array of files now, so now we'll iterate that array
  files.forEach(function(filepath) {
    actionOnFile(null, filepath);
  })
});

Asynchronously iterate the subdirectories of a directory and its subdirectories and pass an array of directory paths to a callback.

var dir = require('node-dir');

dir.subdirs(__dirname, function(err, subdirs) {
  if (err) throw err;
  console.log(subdirs);
  //we have an array of subdirs now, so now we'll iterate that array
  subdirs.forEach(function(filepath) {
    actionOnDir(null, filepath);
  })
});
unsynchronized
  • 4,828
  • 2
  • 31
  • 43
Christiaan Westerbeek
  • 10,619
  • 13
  • 64
  • 89
9

Another suitable library is filehound. It supports file filtering (if required), callbacks and promises.

For example:

const Filehound = require('filehound');

function action(file) {
  console.log(`process ${file}`)
}

Filehound.create()
.find((err, files) => {
    if (err) {
        return console.error(`error: ${err}`);
    }

    files.forEach(action);
});

The library is well documented and provides numerous examples of common use cases. https://github.com/nspragg/filehound

Disclaimer: I'm the author.

nickool
  • 834
  • 11
  • 11
7

Not sure if I should really post this as an answer, but for your convenience and other users, here is a rewritten version of OP's which might prove useful. It provides:

  • Better error management support
  • A global completion callback which is called when the exploration is complete

The code:

/**
 * dir: path to the directory to explore
 * action(file, stat): called on each file or until an error occurs. file: path to the file. stat: stat of the file (retrived by fs.stat)
 * done(err): called one time when the process is complete. err is undifined is everything was ok. the error that stopped the process otherwise
 */
var walk = function(dir, action, done) {

    // this flag will indicate if an error occured (in this case we don't want to go on walking the tree)
    var dead = false;

    // this flag will store the number of pending async operations
    var pending = 0;

    var fail = function(err) {
        if(!dead) {
            dead = true;
            done(err);
        }
    };

    var checkSuccess = function() {
        if(!dead && pending == 0) {
            done();
        }
    };

    var performAction = function(file, stat) {
        if(!dead) {
            try {
                action(file, stat);
            }
            catch(error) {
                fail(error);
            }
        }
    };

    // this function will recursively explore one directory in the context defined by the variables above
    var dive = function(dir) {
        pending++; // async operation starting after this line
        fs.readdir(dir, function(err, list) {
            if(!dead) { // if we are already dead, we don't do anything
                if (err) {
                    fail(err); // if an error occured, let's fail
                }
                else { // iterate over the files
                    list.forEach(function(file) {
                        if(!dead) { // if we are already dead, we don't do anything
                            var path = dir + "/" + file;
                            pending++; // async operation starting after this line
                            fs.stat(path, function(err, stat) {
                                if(!dead) { // if we are already dead, we don't do anything
                                    if (err) {
                                        fail(err); // if an error occured, let's fail
                                    }
                                    else {
                                        if (stat && stat.isDirectory()) {
                                            dive(path); // it's a directory, let's explore recursively
                                        }
                                        else {
                                            performAction(path, stat); // it's not a directory, just perform the action
                                        }
                                        pending--; checkSuccess(); // async operation complete
                                    }
                                }
                            });
                        }
                    });
                    pending--; checkSuccess(); // async operation complete
                }
            }
        });
    };

    // start exploration
    dive(dir);
};
hippietrail
  • 15,848
  • 18
  • 99
  • 158
Samuel Rossille
  • 18,940
  • 18
  • 62
  • 90
  • This is great! One small optimization: you don't need the checkSuccess after the first pending--. You can't ever be at pending==0 unless you've completed the directory that you're currently in. – Tristan Reid May 20 '15 at 13:17
2

Don't reinvent the wheel - use and contribute to open source instead. Try one of the following:

B T
  • 57,525
  • 34
  • 189
  • 207
  • 4
    I don't like that module. [This one](https://npmjs.org/package/dive) (which was the result of this question) has a much simpler api. – pvorb Aug 28 '13 at 13:42
1

There is an NPM module for this:

npm dree

Example:

const dree = require('dree');
const options = {
    depth: 5,                        // To stop after 5 directory levels
    exclude: /dir_to_exclude/,       // To exclude some pahts with a regexp
    extensions: [ 'txt', 'jpg' ]     // To include only some extensions
};

const fileCallback = function (file) {
    action(file.path);
};

let tree;

// Doing it synchronously
tree = dree.scan('./dir', options, fileCallback);

// Doing it asynchronously (returns promise)
tree = await dree.scanAsync('./dir', options, fileCallback);

// Here tree contains an object representing the whole directory tree (filtered with options)
EuberDeveloper
  • 874
  • 1
  • 14
  • 38
-2
function loop( ) {
    var item = list.shift( );
    if ( item ) {
        // content of the loop
        functionWithCallback( loop );
    } else {
        // after the loop has ended
        whatever( );
    }
}
xavierm02
  • 8,457
  • 1
  • 20
  • 24
  • It doesn't become clear what you want me to do in `functionWithCallback(loop);`. Could you apply your solution to my example? – pvorb Aug 12 '11 at 15:06
  • Well for each element of the list you want to do something and this function does that and then calls the callback. I'll edit tomorow to make it closer to your code. – xavierm02 Aug 12 '11 at 20:57