4

I have 13.000 documents from MongoDB where I have address line + Postcode, im trying to make a request for each of them to Google's geocoding API and get LAT + LONG for them so I can have them appear dynamically on a map search.

I have designed the following for of loop and I'm testing with 10 items at a time but due to the async nature of both the writing to DB call and calling the API, the LAT/LONG coordinates from the HTTPS requests ends up being undefined/unavailable to knex's INSERT and the loop seems to keep going on and on...

Is it possible to write this in a blocking way? So the for loop doesn't go to the next item unless both promises have been resolved?

The code:

let results = [];
  await forLoop();

  async function forLoop() {
    for (job of allJobs) {
      const geoData = await getGeoData(
        job.site.addressLine1,
        job.site.postcode
      );
      const dbResult = await addToDb(geoData);
      results.push(dbResult);

      async function getGeoData(addressLine1, postcode) {
        const friendlyAddress = encodeURIComponent(addressLine1 + ' ' + postcode);
        https
          .get(
            'https://maps.googleapis.com/maps/api/geocode/json?key=<API_KEY_IGNORE_THIS_ITS_HARDCODED_IN_MY_REAL_CODE>&address=' +
              friendlyAddress,
            resp => {
              let data = '';
              resp.on('data', chunk => {
                data += chunk;
              });
              // The whole response has been received. Print out the result.
              resp.on('end', () => {
                console.log(JSON.parse(data).explanation);
                let result = JSON.parse(data);
                return result;
              });
            }
          )
          .on('error', err => {
            console.log('Error: ' + err.message);
          });
      }

      async function addToDb(geoData) {
        try {
          await knex('LOCATIONS')
            .returning('*')
            .insert({
              UPRN: job.site.UPRN,
              lat: geoData.results[0].geometry.location.lat,
              lng: geoData.results[0].geometry.location.lng
            });
        } catch (err) {
          err.name = 'database';
          next(err);
        }
      }
    }
  }
  res.send(results);

I've made sure the codebase has no nulls and have tested both the api call and the database call to make sure they work in isolation.

SebastianG
  • 8,563
  • 8
  • 47
  • 111
  • Unrelated to your question, but you cannot simply concatenate text into a URL. You must escape it with `encodeURIComponent`, or things will break. – Brad Mar 29 '19 at 17:02
  • @Brad ahh, I'm used for the framework to do it for me! Thanks for that, I have fixed it and added the fixed code here. – SebastianG Mar 29 '19 at 17:08
  • I'm an old-school VanillaJS guy, so I have a terrible time reading these, however, there's a good chance that using a linter would help. A linter will warn you to never define a function within a loop. Also by using `.forEach` instead of `for`, not only will it be insignificantly faster (a huge selling point for a lot of people for some reason), but you'll keep scope between iterations. Lastly, try getting rid of all the async / await and just using Promise.all and Promises and jobs.pop() until there are none. It's a lot easier to reason about async code when it's not abstracted away. – coolaj86 Mar 30 '19 at 15:13
  • Also `data += chunk` should be `data += chunk.toString()` or `data.push(chunk)` and later `Buffer.concat()` – coolaj86 Mar 30 '19 at 15:14

2 Answers2

1

Just use promises (or callbacks)

I know that everyone hates JavaScript and so these anti-idiomatic transpilers and new language "features" exist to make JavaScript look like C# and whatnot but, honestly, it's just easier to use the language the way that it was originally designed (otherwise use Go or some other language that actually behaves the way that you want - and is more performant anyway). If you must expose async/await in your application, put it at the interface rather than littering it throughout.

My 2¢.

I'm just going to write some psuedo code to show you how easy this can be:

function doItAll(jobs) {
  var results = [];

  function next() {
    var job = jobs.shift();
    if (!job) {
      return Promise.resolve(results);
    }
    return makeRequest(job.url).then(function (stuff) {
      return updateDb().then(function (dbStuff) {
        results.push(dbStuff);
      }).then(next);
    });
  }

  return next();
}

function makeRequest() {
  return new Promise(function (resolve, reject) {
    var resp = http.get('...', {...});
    resp.on('end', function () {
      // ... do whatever
      resolve();
    });
  });
}

Simple. Easy to read. 1:1 correspondence between what the code looks like and what's actually happening. No trying to "force" JavaScript to behave counter to the way it was designed.

The longer you fight learning to understand async code, the longer it will take to to understand it.

Just dive in and learn to write JavaScript "the JavaScript way"! :D

coolaj86
  • 74,004
  • 20
  • 105
  • 125
  • Thanks, have used your example to make a properly working script that's running nice and synchronous. I have added that here as an answer for people that stumble across this in the future but marked your answer as the real one. – SebastianG Apr 01 '19 at 11:17
0

Here's my updated function that works properly and synchronously, getting the data one by one and adding it to the database before moving to the next one.

I have made it by customizing @coolAJ86 answer and I've marked that as the correct one but thought it would be helpful for people stumbling across this thread to see my final, working & tested version.

var geoApiUrl =
    'https://maps.googleapis.com/maps/api/geocode/json?key=<<MY API KEY>>&address=';

  doItAll(allJobs)

  function doItAll(jobs) {
    var results = [];
    var errors = [];

    function nextJob() {
      var job = jobs.shift();
      if (!job) {
        return Promise.resolve(results);
      }
      var friendlyAddress =
        geoApiUrl +
        encodeURIComponent(job.addressLine1 + ' ' + job.postcode);

      return makeRequest(friendlyAddress).then(function(result) {
        if((result.results[0] === undefined) || (result.results[0].geometry === undefined)){
          nextJob();
        } else { 
        return knex('LOCATIONS')
          .returning('*')
          .insert({
            UPRN: job.UPRN,
            lat: result.results[0].geometry.location.lat,
            lng: result.results[0].geometry.location.lng,
            title: job.title,
            postcode: job.postcode,
            addressLine1: job.addressLine1,
            theo_id: job.clientId
          })
          .then(function(data) {
            // console.log('KNEX CALLBACK COMING')
            // console.log(data[0])
            console.log(data[0]);
            results.push(data[0]);
            nextJob();
          })
          .catch(function(err) {
            console.log(err);
            errors.push(job);
          });
        }
      });
    }
    return nextJob();
  }

  function makeRequest(url) {
    return new Promise(function(resolve, reject) {
      https
        .get(url, resp => {
          let data = '';
          resp.on('data', chunk => {
            data += chunk;
          });
          // The whole response has been received. Print out the result.
          resp.on('end', () => {
            let result = JSON.parse(data);
            resolve(result);
          });
        })
        .on('error', err => {
          console.log('Error: ' + err.message);
          reject(err);
        });
    });
  }
SebastianG
  • 8,563
  • 8
  • 47
  • 111