-1

How can I optimize the following NodeJS code to work in an async fashion and be fast? I imagine the stack is getting too big and it's causing the program to slow down and eventually crash. Is there a better way to resolve a big quantity of domain names from the example below?

const dns = require('dns');
const os = require('os');

var logical_cores = os.cpus().length;
console.log(`Logical cores: ${logical_cores}`);
process.env.UV_THREADPOOL_SIZE=logical_cores;

domainToIp_resolve = (domain) => {
  return new Promise((resolve, reject) => {
    // does not use libuv thread pool; uses os async polling 
        dns.resolve(domain, 'A', (err, records) => {
      if (err) {
        reject(`${String(domain).toUpperCase()} | ERROR CODE: ${err.code} |  ERROR MESSAGE: ${err.message}`);
      } else {
        resolve(`${domain}: records: ${records}`);
      }
    });
  });
}

// simulating getting domain names from a list
function* getDomains(num) {
  let cnt = 0;

  for (let ctr = 1; ctr < num+1; ctr++) {

    // 97 = "a"; 123 = "z"
    for (let charCode = 97; charCode < 123; charCode++) {

      let prefix = '';
      for (let p = 0; p < ctr; p++) {
        prefix += `${String.fromCharCode(charCode)}`;
      }

      if (cnt === num) {
        return null;
      } else {
        cnt += 1;
        let domain = `${prefix}.com`;
        yield domain;
      }
    }
  }
}

function resolveDomain(num) {
  let doms = getDomains(num);

  function resolve(cnt, domain) {
    if (cnt < 1) return;
    if (cnt === num) return;

    domainToIp_resolve(domain)
      .then((data) => {
        console.log(data);
      })
      .catch((err) => {
        console.log(err);
      });

    setImmediate(resolve.bind(null, cnt+1, doms.next().value));
  }

  resolve(1, doms.next().value);
}

resolveDomain(1000000);
Gary
  • 909
  • 9
  • 26
  • FYI, I don't think it does any good to set `process.env.UV_THREADPOOL_SIZE=logical_cores` after nodejs has loaded. I think that environment variable is read at startup time and the thread pool is built then. You should set that environment variable before you start your nodejs program. – jfriend00 Dec 11 '20 at 00:54
  • Your main problem is that you're attempting to run 1,000,000 DNS lookups in parallel (all at once). You will, of course, run out of memory and/or some other system resource trying to do that. You need to run only N requests in parallel at once where N is probably some smallish number which you can experiment with to get best performance. I would experiment with values starting at the number of cores you have and going up to 30 and see where the performance sweet spot is. – jfriend00 Dec 11 '20 at 00:57
  • 1
    I'd suggest you look at using something like `mapConcurrent()` in [this answer](https://stackoverflow.com/questions/46654265/promise-all-consumes-all-my-ram/46654592#46654592). – jfriend00 Dec 11 '20 at 00:58

1 Answers1

1

As mentioned in the comments, the problem is not that your program is not fast. The problem is that it is too fast: you're trying to open 1 million sockets in parallel. Even if your OS allows it your router would not like it and from experience downloading torrents if it's a consumer grade home router your router would probably crash.

We can fix it by using async/await. First step - make one request at a time like in other languages such as PHP or Java:

async function resolveDomain(num) { // mark as async so we can use await
  let doms = getDomains(num);

  for (domain of doms) {
    try {
      let data = await domainToIp_resolve(domain);
      console.log(data);
    }
    catch(err) {
      console.log(err);
    }
  }
}

This should work but it would be slow. Now we can make it faster by batching request (I'm doing 20 in parallel here but you can fine-tune this number):

async function resolveDomain(num) { // mark as async so we can use await
  let doms = getDomains(num);
  let count = 0;
  const BATCH_SIZE = 20;

  while (count < num) {
    try {
      let promises = [];
      for (let i=0;i<BATCH_SIZE && count<num;i++,count++) {
        promises.push(domainToIp_resolve(doms.next().value));
      }

      let result = await Promise.all(promises);
      for (let i=0;i<result.length;i++) {
        console.log(result[i]);
      }
    }
    catch(err) {
      console.log(err);
    }
  }
}

It's not strictly necessary to use async/await. You can rewrite the above logic using callbacks but it gets complicated. For logic like this it is just easier to use async/await.

Note that if we don't care about the logging the results in the order of the requests we can optimize the code further by starting a new request before the entire batch completes. I leave that as homework (hint: the async-q library has an implementation that you can use and it even returns results in order)

slebetman
  • 109,858
  • 19
  • 140
  • 171
  • Do you recommend a while loop? Should I incorporate setImmediate to ensure a non-blocking event loop? – Gary Dec 11 '20 at 13:49
  • The `await` keyword will pass control back to the interpreter which does what you intended with `setImmediate()`. So when using async/await there is no need to call `setImmediate()`. In any case even in your original code `dns.resolve()` is asynchronous and does not block the thread. So even with your original code there is no need to call `setImmediate`. DNS resolution is synchronous in most OSes but node uses a separate thread for DNS resolution in order to not block the main thread. It is one of the few things node does in a separate thread – slebetman Dec 11 '20 at 14:43