2

I accidentally typed await(await stat(content... and it worked. Not sure if this is valid syntax, or there is a better way to do it? I'm trying to read all the files that are directories and do not match my regex.

const fs = require('fs')
const path = require('path')
const content = path.resolve('.') + '/docs' + '/'
const util = require('util');
const stat = util.promisify(fs.stat)
const readDir = util.promisify(fs.readdir)
const directories = 'docs/';
const exclude = new RegExp(/^(adir|\.somedir)/,'i');
let newFiles = {}

async function main(){
    const ls = await readDir(directories)
    console.log('starting....');
    let newArray = []
     for (let index = 0; index < ls.length; index++) {
               let x =  await (await stat(content + ls[index])).isDirectory()
               let file = ls[index]
               if (x && !(exclude.test(file))){newArray.push(file)}
               console.log('x is ',x);
        }   
    console.log('new filtered array: ', newArray);
}
ritchie
  • 405
  • 1
  • 4
  • 12
  • 1
    If it was invalid syntax, it wouldn't work. You can await non-promises; you just get the value. – jonrsharpe May 03 '21 at 16:42
  • is this a script or a part of some server code? If the former, it would be easier to use `...Sync` functions and forget about promises. – georg May 03 '21 at 17:57
  • @georg no it's not, thanks. I think CertainPerformance figured out a better way. – ritchie May 03 '21 at 18:01
  • 1
    In the async case, you don't need the `stat` call. See https://nodejs.org/api/fs.html#fs_fspromises_readdir_path_options, option `withFileTypes`. – georg May 03 '21 at 18:12
  • @georg looking into that ```const files = await readdir(directories,{withFileTypes:true});``` – ritchie May 03 '21 at 18:29

4 Answers4

2

ls

My advice would be not to put all of your eggs in one basket. We can write an ultra fast ls function using Node's fs.Dirent objects and bypass the need for a slow fs.stat call on each file -

// fsext.js

import { readdir } from "fs/promises"
import { join } from "path"

async function* ls (path = ".")
{ yield { dir: path }
  for (const dirent of await readdir(path, { withFileTypes: true }))
    if (dirent.isDirectory())
      yield *ls(join(path, dirent.name))
    else
      yield { file: join(path, dirent.name) }
}

async function toArray (iter)
{ const r = []
  for await (const v of iter)
    r.push(v)
  return r
}

export { ls, toArray }
// main.js

import { ls, toArray } from "./fsext.js"

toArray(ls("./node_modules")).then(console.log, console.error)

To test it out, let's add some popular npm packages so we have a large hierarchy to test our our program. We'll install the lot and count the number of directories and files -

$ npm install async chalk commander debug express immutable lodash moment prop-types react react-dom request webpack

$ find ./node_modules | wc -l
5453

Now let's run our program and time it -

$ time node main.js
[
  { dir: './node_modules' },
  { dir: 'node_modules/.bin' },
  { file: 'node_modules/.bin/acorn' },
  { file: 'node_modules/.bin/browserslist' },
  { file: 'node_modules/.bin/loose-envify' },
  { file: 'node_modules/.bin/mime' },
  { file: 'node_modules/.bin/sshpk-conv' },
  { file: 'node_modules/.bin/sshpk-sign' },
  { file: 'node_modules/.bin/sshpk-verify' },
  { file: 'node_modules/.bin/terser' },
  { file: 'node_modules/.bin/uuid' },
  { file: 'node_modules/.bin/webpack' },
  { file: 'node_modules/.package-lock.json' },
  { dir: 'node_modules/@types' },
  { dir: 'node_modules/@types/eslint' },
  { file: 'node_modules/@types/eslint/LICENSE' },
  { file: 'node_modules/@types/eslint/README.md' },
  { file: 'node_modules/@types/eslint/helpers.d.ts' },
  { file: 'node_modules/@types/eslint/index.d.ts' },
  { dir: 'node_modules/@types/eslint/lib' },
   ... 5433 more items
]
node main.js  0.09s user 0.02s system 116% cpu 0.099 total

dirs

If we only want directories, we can write dirs as a simple specialization of our generic ls -

// fsext.js (continued)

async function* dirs (path)
{ for await (const f of ls(path))
    if (f.dir)
      yield f.dir
}
$ find ./node_modules -type d | wc -l
457

Now compare it against our program

// main.js

import { dirs, toArray } from "./fsext.js"

toArray(dirs("./node_modules")).then(console.log, console.error)
$ time node.main.js
[
  './node_modules',
  'node_modules/.bin',
  'node_modules/@types',
  'node_modules/@types/eslint',
  'node_modules/@types/eslint/lib',
  'node_modules/@types/eslint/lib/rules',
  'node_modules/@types/eslint/rules',
  'node_modules/@types/eslint-scope',
  'node_modules/@types/estree',
  'node_modules/@types/json-schema',
  'node_modules/@types/node',
  'node_modules/@types/node/assert',
  'node_modules/@types/node/dns',
  'node_modules/@types/node/fs',
  'node_modules/@types/node/stream',
  'node_modules/@types/node/timers',
  'node_modules/@types/node/ts3.6',
  'node_modules/@webassemblyjs',
  'node_modules/@webassemblyjs/ast',
  'node_modules/@webassemblyjs/ast/esm',
  ... 437 more items
]
node main2.js  0.09s user 0.02s system 108% cpu 0.099 total

exclude

If we want to exclude certain directories or files, we can write it generically as well -

// fsext.js (continued)

async function* exclude (iter, test)
{ for await (const v of iter)
    if (Boolean(test(v)))
      continue
    else
      yield v
}
// main.js

import { dirs, exclude, toArray } from "./fsext.js"

toArray(exclude(dirs("./node_modules"), v => /@/.test(v)))
  .then(console.log, console.error)

$ time node main.js
[
  './node_modules',
  'node_modules/.bin',
  'node_modules/accepts',
  'node_modules/acorn',
  'node_modules/acorn/bin',
  'node_modules/acorn/dist',
  'node_modules/ajv',
  'node_modules/ajv/dist',
  'node_modules/ajv/lib',
  'node_modules/ajv/lib/compile',
  'node_modules/ajv/lib/dot',
  'node_modules/ajv/lib/dotjs',
  'node_modules/ajv/lib/refs',
  'node_modules/ajv/scripts',
  'node_modules/ajv-keywords',
  'node_modules/ajv-keywords/keywords',
  'node_modules/ajv-keywords/keywords/dot',
  'node_modules/ajv-keywords/keywords/dotjs',
  'node_modules/ansi-styles',
  'node_modules/array-flatten',
  ... 351 more items
]
node main.js  0.09s user 0.02s system 105% cpu 0.104 total

reorganize

In our file system extensions module, fsext, we wrote two functions that work on any iterables, not just the ls or dirs. I would suggest breaking these out into their own iter module. This type of reorganization helps decouple concerns and maximize code reuse throughout your entire program -

// iter.js

async function* empty () {}

async function* exclude (iter = empty(), test = Boolean)
{ for await (const v of iter)
    if (Boolean(test(v)))
      continue
    else
      yield v
}

async function toArray (iter = empty())
{ const r = []
  for await (const v of iter)
    r.push(v)
  return r
}

export { empty, exclude, toArray }
// fsext.js

import { readdir } from "fs/promises"
import { join } from "path"

async function* ls (path = ".")
{ yield { dir: path }
  for (const dirent of await readdir(path, { withFileTypes: true }))
    if (dirent.isDirectory())
      yield *ls(join(path, dirent.name))
    else
      yield { file: join(path, dirent.name) }
}

async function* dirs (path)
{ for await (const f of ls(path))
    if (f.dir)
      yield f.dir
}

async function* files (path)
{ for await (const f of ls(path))
    if (f.file)
      yield f.file
}

export { ls, dirs, files }
// main.js

import { dirs } from "./fsext.js"
import { exclude, toArray } from "./iter.js"

const somePath = "..."
const someTest = v => ...

toArray(exclude(dirs(somePath), someTest))
  .then(console.log, console.error)

search

Looking for a specific file or folder? Read on in this Q&A to implement search.

Mulan
  • 129,518
  • 31
  • 228
  • 259
  • Thank you thank you, I'm going to have to check that out. I did post an answer right now but does not seem as sophisticated as yours. I like the yield. – ritchie May 03 '21 at 19:57
  • @ritchie I'm happy to help. If you have any more questions, please don't be shy :D – Mulan May 03 '21 at 21:12
  • Is there even a function named join() ? I'm trying to get your script to work. Had to change all the Imports to ```const { stat, readdir ,join} = require('fs').promises;``` I know there is path.join() . Oh and I tested mine with 7000 files, just a fraction of a second. I'm probably never going to have 7000 files in a directory lol. I'm working on testing yours. – ritchie May 04 '21 at 01:48
  • Ok I got it working now, that's pretty insane that you came up with that. It's not faster than mine tested with 7000 files but then again you're is recursive which mine is not. I don't require recursive for the website. In case you are wondering yours is: 079s,084,019s... while mine is 065s,040s,018s. – ritchie May 04 '21 at 02:02
  • `join` comes from the [path](https://nodejs.org/api/path.html#path_path_join_paths) module. i don't remember which version of node adds support for `import` but it's been around for awhile. to enable it, you have to add `{ "type": "module" }` to `package.json`. 7,000 files might seem like a lot, but this was only to demonstrate that `ls` is fast even when operating on large, deep file systems. i chose a recursive procedure is because file hierarchies are recursive in structure and it's easier to reason about a program that matches the shape of the data it operates upon. all the best :D – Mulan May 04 '21 at 02:22
  • yea I'm sure it will come in handy for a sitemap or something. I made it work with this ```const {join} = require('path')``` – ritchie May 04 '21 at 02:34
  • perfect. that's exactly how to include `path.join` using `require` – Mulan May 04 '21 at 02:37
1

isDirectory returns a boolean, not a Promise<boolean> so the second await is superfluous, you could just write (await stat(content + ls[index])).isDirectory()

Guerric P
  • 30,447
  • 6
  • 48
  • 86
  • Yea, you're right. That's interesting how this for loop works yet many have a hard time using loops with async even create blogs trying to figure out how to use a loop in async huh ? – ritchie May 03 '21 at 16:50
  • 1
    Well `await` works in for loops as well as in any code, it just uses Promise chaining under the hood. You should consider @CertainPerformance answer because your output array only depends on the corresponding file of every input file path, so you could parallelize the tasks and improve performance drastically – Guerric P May 03 '21 at 16:57
0

Since it works, the syntax is valid - but the code is confusing and probably shouldn't be used without some tweaks. The important thing to know here is that it's valid to await something that isn't a Promise. If the expression on the right is a Promise, the whole thing will resolve to the value of the resolved Promise; if the expression on the right is not a Promise, the whole expression will resolve to that value. That is:

await Promise.resolve(5)

is essentially the same as

await 5

but while the second works, it's confusing - better to only await things that are Promises. fs.isDirectory doesn't return a Promise, so it'd be a good idea to remove the await from it.

There's also a better approach altogether for what you're doing: use Promise.all instead, so that all items in the directory can be searched through at once, instead of having to wait for them one-by-one. Your current code will take a long time if there are a whole lot of items in the directory - this is not necessary.

You can also simplify the regex by using a regex literal instead of new RegExp.

const exclude = /^(?:adir|\.somedir)/i;

async function main() {
    const filenames = await readDir(directories);
    const newArray = await Promise.all(
        filenames.map(async (filename) => {
            const fileStat = await stat(content + filename);
            if (fileStat.isDirectory && !(exclude.test(file))) {
                return filename;
            }
        })
    )
    const results = newArray.filter(Boolean);
    console.log('new filtered array: ', results);
}

You can also consider using fs.promises instead, instead of using util.promisify.

const { stat, readdir } = require('fs').promises;
CertainPerformance
  • 356,069
  • 52
  • 309
  • 320
  • promisify does return a promise. – ritchie May 03 '21 at 16:47
  • @ritchie `isDirectory` does not return a Promise, so there's no sense `await`ing it. Only use *one* `await`, for `stat`. – CertainPerformance May 03 '21 at 16:48
  • I meant to say stat . – ritchie May 03 '21 at 16:50
  • I got readDir is not a function, trying to fix that. Even changed it in the ```const { stat, readdir } = require('fs').promises;``` – ritchie May 03 '21 at 17:05
  • @ritchie Make sure to use consistent capitalization. The function name is `readdir`, not `readDir` – CertainPerformance May 03 '21 at 17:06
  • I used readdir in both I get refrence erro cannot access stat before initilization. – ritchie May 03 '21 at 17:10
  • @ritchie Make sure to import `stat` from `.promises` before doing the rest of your code. Put it at the top, not at the bottom. You can't access variables before defining them in JavaScript. – CertainPerformance May 03 '21 at 17:11
  • Sorry it's been a while since I have been in the javascript world. Can you explicitly tell me what I should import after ```const { stat, readdir } = require('fs').promises;``` – ritchie May 03 '21 at 17:41
  • The import order doesn't matter, just make sure you put the rest of the code eg `async function main()` *after* all the imports, since you can't reference variables before defining them. Just do `const { stat, readdir } = require('fs').promises;` before proceeding to use `stat` and `readdir`. – CertainPerformance May 03 '21 at 17:44
  • I think the problem was that you named stat and stat ``` const stat = await stat(content + filename);``` and even then the value of that is an annoymous function. – ritchie May 03 '21 at 17:59
-1

Much credit goes to gorg and certainPerformance. This is a simple solution.

const { stat, readdir } = require('fs').promises;

async function main() {
    try {
        const getFiles = await readdir(directories, { withFileTypes: true })
        let foo = getFiles.filter(x=> x.isDirectory() && ! excludeDir.test(x.name))
        .map(f=>f.name);
    } catch (err) {
    console.error(err);
    }
}

ritchie
  • 405
  • 1
  • 4
  • 12