0

I'm trying to write a simple recursive file walker using node's fs API and am making a basic error with callbacks, but am having a hard time seeing it.

the final return the_files; executes and returns an empty array [] instead of one filled with the file objects

the debug statement does log a populated array to the console with Dirent objects in the array.

the testdir structure is:

/testdir/a.txt
/testdir/b.txt
/testdir/c.txt
/testdir/somedir/d.txt

what am i missing?

function getTheFiles(mydir){
    let the_files = [];
    
    fs.readdir(mydir, {withFileTypes: true}, function (err, files){

        if (err){
            return console.log('Unable to scan directory: ' + err);
        }

        files.forEach(function (file) {
            if (file.name[0] === '.') return; //skip dotfiles
            if (file.isDirectory()){
                getTheFiles(path.resolve(mydir, file.name));
            }
            if (file.isFile()){
                the_files.push(file);
                console.log(the_files); //debug
            }
        })
    })
    return the_files;
}
//output: []

I've gone through some excellent SO answers and blogs several times, but have not been able to massage my code into the right format:

Ref 1: Why is my variable unaltered after I modify it inside of a function? - Asynchronous code reference

Ref 2: Get data from fs.readFile

Ref 3: forEach does not play well with async

Ref 4: Why is my variable unaltered after I modify it inside of a function? - Asynchronous code reference

Ref 5: How do you get a list of the names of all files present in a directory in Node.js? - doesn't help you do things with the result of the dir walk

Here's another attempt where I tried calling a function with the result of the async readdir -

function async_ver_getTheFiles(dir){
    fs.readdir(dir, {withFileTypes: true}, function read(err, files){
        if (err){
            throw err;
        }
        filePacker(files);
    });

}

function filePacker(files){
    let the_files = [];
    for (const file of files){
        if (file.name[0] === '.') continue; //skip dotfiles
        if (file.isFile()){
            the_files.push(file.name);
        }
        if (file.isDirectory()){
            async_ver_getTheFiles(file.name);
        }
    }
    console.log(the_files); //returns the files
    return the_files;
}

var result = async_ver_getTheFiles(dir);
console.log(result); //returns undefined
//output: returns the undefined result before printing the correct result from within filePacker()

I do have a sync version working fine:

function getTheFiles(dir){
    let the_files = [];
    let files = [];

    try {
        files = fs.readdirSync(dir, {withFileTypes: true});
    } 
    catch (error) {
        console.error('failed calling fsreaddirSync(dir, {withFileTypes: true} on value dir: \'' + dir + '\'\n');
        console.error(error);
    }

    for (const file of files){
        if (file.name[0] === '.') return; //skip dotfiles
        if (file.isFile()){
            let absolutePath = path.resolve(dir, file.name);
            the_files.push(absolutePath);
        }
        else if (file.isDirectory()){
            the_files.push(getTheFiles(path.resolve(dir, file.name)).toString()); //should be the_files.push(...getTheFiles(blah)) instead - see accepted answer
        }
}
    return the_files;
}
//output:
<path>/a.txt
<path>/b.txt
<path>/c.txt
<path>/somedir/d.txt
phoenixdown
  • 828
  • 1
  • 10
  • 16

2 Answers2

1

Maybe the whole thing gets a bit easier if you switch from the callback form of the fs API to the promise based version of the API. Ie, instead of

const fs = require('fs');

use

const fs = require('fs').promises;

This way, your fs function won't have a signature like

fs.readdir(dir, (err, files) => {})

anymore but will look like the following

fs.readdir(dir) : Promise<string[]>

and you can use them like the following

let files = await fs.readdir("/foo/bar");

This way, you can (more or less) take your synchronous approach, and everywhere, where you now have a

let baz = fs.foobarSync(...)

you can then use

let baz = await fs.foobar();

Thus your code would be like (see also the comments in the code)

// require the promise based API
let fs = require('fs').promises;

// be sure to make this function async
// so you can use "await" inside of it
async function getTheFiles(dir){
  let the_files = [];
  let files = [];

  try {
   files = await fs.readdir(dir, {withFileTypes: true});
  } 
  catch (error) {
    console.error('...');
    console.error(error);
  }

  for (const file of files){
    if (file.name[0] === '.') continue; // <-- use continue here instead of return to skip this file and check also the remaining files
    
    if (file.isFile()){
      let absolutePath = path.resolve(dir, file.name);
      the_files.push(absolutePath);
    }
    else if (file.isDirectory()){
      // get all files from the subdirectory
      let subdirfiles = await getTheFiles(path.resolve(dir, file.name))

      // as getTheFiles returns an array
      // you can use the spread syntax to push all
      // elements of that array intto the_files array
      the_files.push(...subdirfiles);
    }
  }
  return the_files;
}


getTheFiles("/foo/bar").then(files => {
  console.log(files);
})
.catch(e => {
  console.log(e);
})
derpirscher
  • 14,418
  • 3
  • 18
  • 35
  • that spread syntax solved a problem i didn't realize i had when i originally posted! thank you. for the use of `continue`, as i understand it this is expected when using `for/of` because it is an iterator (so the code is correct, but for a different reason than the comment suggests), but `return` is required when using `.forEach()` since `.forEach()` spawns a separate function for each item in the array -- does this match your understanding @derpirscher ? – phoenixdown Aug 30 '21 at 22:07
  • 1
    Yes. If you use `forEach()` you'd use `return` to return from the callback function. But as the `for .. of` loop lives within the same function, a `return` here would immediately terminate the function. Thus use `continue` to just skip this element of the array and continue with the next one. You can't use `forEach` with `async/await` ... – derpirscher Aug 30 '21 at 22:10
  • this works! i think i'm having trouble following something though. when i implement as you've written it works. however if i do `var result = getTheFiles(dir).then(files => { return files }); console.log(result);` i get `Promise { }`-- do you know why this is? is it just impossible to take the value of an asynch function and store it outside the function chain? – phoenixdown Aug 30 '21 at 22:34
  • 1
    `.then(...)` returns a promise. And thus `result` holds that promise (and not its result). And this is what you see when you log out a promise to the console – derpirscher Aug 30 '21 at 22:42
  • ah that makes sense! thank you. i still end up getting undefined if i try to account for that, e.g. with: ```var result; getTheFiles(dir).then(files => { result = files }); console.log(result);``` and now that i type it out...if i understand correctly this is because the `console.log(result)` has executed before the Promise has been fulfilled. is it reasonable to say that Promises are like a way of defining a DAG (directed acyclic graph) for code execution with javascript functions that may take some time to complete? – phoenixdown Aug 30 '21 at 22:44
  • 1
    Of course, because you are executing the `console.log` before the promise resolved (ie it's async ...) Read some tutorials about async programming and eventloops in JavaScript. – derpirscher Aug 30 '21 at 22:47
-1

There are multiple ways to do this, in all of these the function that calls getTheFiles won't have the result immediately (synchronously), but will have to wait and react when the result is ready.

  1. With a callback:
function getTheFiles(mydir, cb){
    let the_files = [];
    
    fs.readdir(mydir, {withFileTypes: true}, function (err, files){

        if (err){
            return console.log('Unable to scan directory: ' + err);
        }

        files.forEach(function (file) {
            if (file.name[0] === '.') return; //skip dotfiles
            if (file.isDirectory()){
                getTheFiles(path.resolve(mydir, file.name));
            }
            if (file.isFile()){
                the_files.push(file);
                console.log(the_files); //debug
            }
        })
        // pass the files to the callback
        cb(the_files)
    })
}

getTheFiles('some_path', files => {
  // use files here
})
  1. With a Promise:
function getTheFiles(mydir){
    let the_files = [];
    
    // return a Promsie
    return new Promise((resolve, reject) => {
      fs.readdir(mydir, {withFileTypes: true}, function (err, files){

          if (err){
              // reject on error
              reject('Unable to scan directory: ' + err)
          }

          files.forEach(function (file) {
              if (file.name[0] === '.') return; //skip dotfiles
              if (file.isDirectory()){
                  getTheFiles(path.resolve(mydir, file.name));
              }
              if (file.isFile()){
                  the_files.push(file);
                  console.log(the_files); //debug
              }
          })
          // resolve the promise
          resolve(the_files)
      })
    })
}

getTheFiles('some_path').then(files => {
  // use files here
})
  1. With a Promise and await:
function getTheFiles(mydir){
    let the_files = [];
    
    // return a Promsie
    return new Promise((resolve, reject) => {
      fs.readdir(mydir, {withFileTypes: true}, function (err, files){

          if (err){
              // reject on error
              reject('Unable to scan directory: ' + err)
          }

          files.forEach(function (file) {
              if (file.name[0] === '.') return; //skip dotfiles
              if (file.isDirectory()){
                  getTheFiles(path.resolve(mydir, file.name));
              }
              if (file.isFile()){
                  the_files.push(file);
                  console.log(the_files); //debug
              }
          })
          // resolve the promise
          resolve(the_files)
      })
    })
}

async function someAsyncFunction() {
   // wait until getTheFiles finished (but don't block other code from executing as sync would do)
   const files = await getTheFiles('some_path')
   // use files here
}

I think fs supports promises also natively with fs.promises or something similar, but I didn't look that up now. May be a good solution too.

A_A
  • 1,832
  • 2
  • 11
  • 17
  • If you propose using promises, why don't you use the promise base version of the `fs` API, which is part of nodejs since version 10.24, instead of wrapping the callbacks into promises yourself? – derpirscher Aug 30 '21 at 21:58
  • Furthermore, you forgot to `await` the recursive call of `getTheFiles` and collect its result, so the results for all of the subdirectories will get lost ... – derpirscher Aug 30 '21 at 22:01
  • And in the callback based version, you are not passing any callback to the recusive call of `getTheFiles` ... – derpirscher Aug 30 '21 at 22:15
  • Oh, I didn't see that it is recursive. I've used the callback API, because the OP used it and hence can compare his version against my version (which imo is more useful to learn a new concept like async behaviour). – A_A Aug 31 '21 at 06:27