2

I'm attempting to use nightmarejs to scrape information from a few websites. The problem that I'm running into is, I only want to open one window at a time and wait for it to close before processing the next url.

// index.js

var urls = // an array of urls.

var l = 10; // urls.length;
while (l--) {
  // g.findById(id).then()....
  // y.findById(id).then()....

  UrlProcessing.findById(id).then(function(results) {
    console.log(results);
  });
}

Now the findByid:

//UrlProcessing.js

class UrlProcessing {


  findById(id) {
    var address = id;

    return new Promise(function (resolve, reject) {
      vo(function*(address) {
        var nightmare = new Nightmare({show: true});
        var link = yield nightmare
            .goto(address)
            .wait(2000)
            .evaluate(function() {
            return document.getElementsByTagName('html')[0].innerHTML;
            });
        yield nightmare.end();
        return yield link;

      })(address, function(err, res) {
        if(err) reject(err);

        resolve(res);
      });
    });
  }
  module.exports = UrlProcessing;
}

Any suggestions on how I can achieve this? I want to perform each findById from within the while loop.

Mithrilhall
  • 1,485
  • 8
  • 33
  • 52
  • Possible duplicate of [Resolve promises one after another (i.e. in sequence)?](http://stackoverflow.com/questions/24586110/resolve-promises-one-after-another-i-e-in-sequence) – mido Feb 16 '16 at 02:16

2 Answers2

3

Without modifying findById, you can simulate series- or waterfall-like behavior using reduce:

var urls = ['http://www.yahoo.com', 'http://example.com', 'http://w3c.org'];
urls.reduce(function(accumulator, url) {
  return accumulator.then(function(results) {
    return findById(url)
      .then(function(result) {
        results.push(result);
        return results;
      });
  });
}, Promise.resolve([])).then(function(results){
  //do what you need to do with the results
});

For completeness' sake, and because I had to make a couple of touchups, the findById method with my (slight) modifications:

function findById(address) {
  return new Promise(function(resolve, reject) {
    vo(function * (address) {
      var nightmare = new Nightmare({
        show: true
      });
      var link = yield nightmare
        .goto(address)
        .wait(2000)
        .evaluate(function() {
          return document.getElementsByTagName('html')[0].innerHTML;
        });
      yield nightmare.end();
      return link;
    })(address, function(err, res) {
      if (err) reject(err);
      resolve(res);
    });
  });
}

... all that being said, I'm not sure this approach is best. Why do you want them one at at a time in separate Nightmare instances? I realize this doesn't totally fit your original implementation, but this may be something you want to consider - you could change findById around to accept an array instead of a single URL and also (optionally) use the same Nightmare instance. Calling findById:

var urls = ['http://www.yahoo.com', 'http://example.com', 'http://w3c.org'];
findById(urls)
 .then(function(results) {
    //do what you need to do with the results
  });

... and findById itself:

function findById(addresses) {
  return new Promise(function(resolve, reject) {
    vo(function * (addresses) {
      var nightmare = new Nightmare({
        show: true
      });
      var results = [];
      for (var i = 0; i < addresses.length; i++) {
        results.push(yield nightmare
          .goto(addresses[i])
          .wait(2000)
          .evaluate(function() {
            return document.getElementsByTagName('html')[0].innerHTML;
          }));
      }
      yield nightmare.end();
      return results;

    })(addresses, function(err, res) {
      if (err) reject(err);
      resolve(res);
    });
  });
}

Of course, if you still wanted fresh Nightmare instances every time, you could move the constructor call and the call to .end() inside of the for loop.

Ross
  • 2,448
  • 1
  • 21
  • 24
1

You are doing everything almost correct, now all you need to do is, sequentialize the promises i.e chain them, you can take a look at this answer.

Just change your code to use reduce:

// index.js

urls.reduce( function(promise, url){
  return promise.then(function(){
    return url.findById(id);
  }).then(function(results){
    console.log(results);
  });
}, Promise.resolve())
  .then(function(){
    console.log('All done');
  });

in more condensed ES6 form, it would be:

urls.reduce( (p, url) => p.then(() => url.findById(id)).then(r => console.log(r)), Promise.resolve())
  .then(() => console.log('All done') );
Community
  • 1
  • 1
mido
  • 24,198
  • 15
  • 92
  • 117