2

I'm pulling data from a website. The data being pulled is pagninated. When making a request to the first page a response is recieved that contains a nextCursor. This cursor must be used to get the results for page 2. Each page request requires a nextCursor.

I'm struggling to implement this using Promises because I can't find any way to loop. This is how I assume it works without Promises (not tested, but it demonstrates what I'm trying to do):

let nextCursor = argv.initalCursor

do {
  let r = request('http://example.com/items.php?cursor=' + nextCursor, function(err, resp, body) {
    if(err) throw new Error(err)

    // Do something with the data

    nextCursor = JSON.parse(body)['nextCursor']
  })
} while(nextCursor)

As you can see the number of iterations in the loop is unknown. It will loop until there isn't a nextCursor in the response.

What I want to do is implement this using Promises. Except I don't know how to create a loop that works in a similar way because each request is dependant on the last one.

How would this work using promises?

Here's my current solution which is failing when return self.cursorRequest. The script just halts executution.

'use strict'

let Promise = require('bluebird')
let _ = require('lodash')

class Event {
  constructor(session) {
    this.session = session
    this.scrapedIDs = [] // I don't like this!
  }

  parseGuestsAndCursor(json, guestType) {
    let ids = json['payload'][guestType]['sections'][2][1].map(function(user) {
      return user['uniqueID']
    })

    return {
      ids: _.uniq(ids),
      cursor: json['payload'][guestType]['cursor']
    }
  }

  cursorRequest(cursor, eventID, guestType) {
    let self = this

    return new Promise(function(resolve, reject) {
      let url = `https://example.com/events/typeahead/guest_list/?event_id=${eventID}&tabs[0]=${guestType}&order[${guestType}]=affinity&bucket_schema[${guestType}]=friends&cursor[${guestType}]=${cursor}&dpr=1&__user=${self.session.uid}&__a=1`

      self.session.request(url, function(err, resp, body) {
        if(err) reject(err)
        let json

        try {
          json = JSON.parse(body.substring(9))
        } catch(err) {
          reject(err)
        }

        resolve(self.parseGuestsAndCursor(json, guestType))
      })
    })
  }

  members(eventID, limit, guestType) {
    let self = this
    let ids = []

    return new Promise(function(resolve, reject) {
      let url = `https://example.com/events/typeahead/guest_list/?event_id=${eventID}&tabs[0]=watched&tabs[1]=going&tabs[2]=invited&order[declined]=affinity&order[going]=affinity&order[invited]=affinity&order[maybe]=affinity&order[watched]=affinity&order[ticket_purchased]=affinity&bucket_schema[watched]=friends&bucket_schema[going]=friends&bucket_schema[invited]=friends&bucket_schema[ticket_purchased]=friends&dpr=1&__user=${self.session.uid}&__a=1`

      self.session.request(url, function(err, resp, body) {
        if(err) reject(new Error(err))
        let json, guests

        try {
          json = JSON.parse(body.substring(9))
          guests = self.parseGuestsAndCursor(json, guestType)
        } catch(err) {
          reject(err)
        }

        self.cursorRequest(guests.cursor, eventID, guestType).then(function(guests) {

          self.scrapedIDs.concat(guests.ids).map(function(user) {
            return user['uniqueID']
          })

          if(guests.cursor) {
            return self.cursorRequest(guests.cursor, eventID, guestType)
          }
          else {
            resolve(self.scrapedIDs)
          }
        })
      })
    })
  }
}

module.exports = Event
BugHunterUK
  • 8,346
  • 16
  • 65
  • 121
  • I don't see any promises there at all. – T.J. Crowder Aug 22 '16 at 09:26
  • @T.J.Crowder There isn't, because I am not sure how to implement what I am trying to do with promises. That's why I am asking, how would this sort of thing be done with promises? I don't require a solution, just a pointer to any resources that can help. – BugHunterUK Aug 22 '16 at 09:27
  • You want the promise to wait until all the data is retrieved before resolving? – T.J. Crowder Aug 22 '16 at 09:28
  • Have you had a look at [Correct way to write loops for promise](http://stackoverflow.com/q/24660096/1048572)? – Bergi Aug 23 '16 at 19:31
  • @Bergi Yeah I did see that. But, because the next request depends on data from the last I didn't find it useful. Whereas with the solution I arrived at using recursion I was able to pass in the data I needed back to the function. – BugHunterUK Aug 23 '16 at 19:37
  • @Bergi here's the solution I created that works fine: https://gist.github.com/JamesTheHacker/5faa7bd4ef17e52d029210473b8e4f44 – BugHunterUK Aug 23 '16 at 19:44
  • @BugHunterUK: you should [promisify](http://stackoverflow.com/q/22519784/1048572) only `this.session.request` and then chain everything from it with `then` callbacks. That way you can drop the `try`/`catch`, and remove the code repetition between `cursorRequest` and `members`. Oh, and you definitely should [avoid the promise constructor antipattern](http://stackoverflow.com/q/23803743/1048572) in `members`! – Bergi Aug 23 '16 at 19:51
  • @Bergi Thanks, that's some good advice I appreciate it. – BugHunterUK Aug 23 '16 at 19:52

2 Answers2

3

Since the process is asynchronous, you don't use a looping construct at all; you just use a function that calls itself (indirectly via another function).

In your case, since you've said you want to implement this with promises, here's how you do that (easiest to express actually in code);

var p = new Promise(function(resolve, reject) {
    let nextCursor = argv.initialCursor;

    doRequest();

    function doRequest() {
        request('http://example.com/items.php?cursor=' + nextCursor, handleResult);
    }

    function handleResult(err, resp, body) {
        if (err) {
            // Got an error, reject the promise
            reject(err);
        } else {
            // Do something with the data

            // Next?
            nextCursor = JSON.parse(body)['nextCursor'];
            if (nextCursor) {
                // Yup, do it
                doRequest();
            } else {
                // No, we're done
                resolve(/*...resolution value here...*/);
            }
        }
    }
});

(The ES2015 version looks basically the same.)

T.J. Crowder
  • 1,031,962
  • 187
  • 1,923
  • 1,875
  • This is not working because I get `Maximum call stack size exceeded` – BugHunterUK Aug 22 '16 at 11:33
  • @BugHunterUK: You must have a typo in the version you're running. You haven't accidentally put `()` after `handleResult` in the `request(..., handleResult);` line, by chance? Important that they *not* be there, we want to pass the function reference, not call it. – T.J. Crowder Aug 22 '16 at 11:34
1

One other way to do this is to promisify the asynchronous function you're using, and build upon that.

The benefit to this approach is that the functionality is modularized so you can reuse the makeRequest() function if you want to make other types of requests with promises:

let nextCursor = argv.initalCursor

function requestPromise(url) {
    return new Promise(function (resolve, reject) {
        request(url, function (err, resp, body) {
            if (err) { reject(new Error(err)); }
            else { resolve({ resp: resp, body: body}); }
        });
   });
}

function queryCursor(cursor) {
    return requestPromise('http://example.com/items.php?cursor=' + cursor)
        .then(function (result) {
            // do something with result

            var nextCursor = JSON.parse(result.body).nextCursor;

            if (nextCursor) {
                return queryCursor(nextCursor);
            }
        });
}

queryCursor(nextCursor)
    .catch(function (err) {
        // deal with err
    });
JLRishe
  • 99,490
  • 19
  • 131
  • 169
  • I've updated my question with my current solution. I can't seem to get it to work. When I `return` it just halts the script. – BugHunterUK Aug 22 '16 at 10:49