17

I have two files; server.js and scrape.js, below are the code snippets as they currently stand.

server.js:

const scrape = require("./scrape");

async function start() {
    const response = await scrape.start();
    console.log(response);
}

start();

and scrape.js:

const cheerio = require("cheerio");
const request = require("request-promise");

go = async () => {

const options = {
  uri: "http://www.somewebsite.com/something",
  transform: function(body) {
    return cheerio.load(body);
  }
};

request(options)
  .then($ => {
    let scrapeTitleArray = [];
    $(".some-class-in-html").each(function(i, obj) {
      const data = $(this)
        .text()
        .trim();
      scrapeTitleArray.push(data);
    });
    return scrapeTitleArray;
  })
  .catch(err => {
    console.log(err);
  });
};

module.exports = {
  start: go
};

So when I spin up server.js, I return undefined to the console.log(response), when I actually want to return the array i've been pushing to, can you see where I'm going wrong?

razki
  • 1,171
  • 2
  • 8
  • 16
  • Shouldn't "go" return a promise? – McMurphy Nov 17 '17 at 00:57
  • @McMurphy If i'm not mistaken, declaring it as an async function automatically returns a promise? – razki Nov 17 '17 at 00:58
  • 2
    `.each()` uses a "callback". You need to wrap that in a promise and resolve it's result. Otherwise the code just skips straight to returning the empty array. You need to `return` the `request` result of course, but if you don't respect the callback then you're still going to have problems. – Neil Lunn Nov 17 '17 at 00:58
  • `go` is an [implicitly global variable](http://blog.niftysnippets.org/2008/03/horror-of-implicit-globals.html). Don't do that. – Bergi Nov 17 '17 at 01:28
  • Why do you define `go` as an `async` function but then nowhere use `await`? – Bergi Nov 17 '17 at 01:29

2 Answers2

29

You need to return something from your async function (a return inside a then does not return from the main function). Either a promise or something you await-ed.

Also, make sure to declare your go variable to avoid leaking it into the global space.

const go = async () => {

  const options = {
    uri: "http://www.somewebsite.com/something",
    transform: function(body) {
      return cheerio.load(body);
    }
  };

  return request(options)
    .then($ => {
      let scrapeTitleArray = [];
      $(".some-class-in-html").each(function(i, obj) {
        const data = $(this)
          .text()
          .trim();
        scrapeTitleArray.push(data);
      });
      return scrapeTitleArray;
    })
    .catch(err => {
      console.log(err);
    });
};

Since you are using an async function, you might want to take advantage of the await syntax also.

const go = async () => {

  const options = {
    uri: "http://www.somewebsite.com/something",
    transform: function(body) {
      return cheerio.load(body);
    }
  };

  try {
    const $ = await request(options);
    $(".some-class-in-html").each(function(i, obj) {
      const data = $(this)
        .text()
        .trim();
      scrapeTitleArray.push(data);
    });
    return scrapeTitleArray;
  }
  catch (err) {
    console.log(err);
  }
};
Alexander O'Mara
  • 58,688
  • 18
  • 163
  • 171
2

I believe your go function isn't returning any value.

You're calling request(options).then(...), but what follows from that promise is never returned by go. I recommend you add a return statement:

go = async () => {

  const options = {
    uri: "http://www.somewebsite.com/something",
    transform: function(body) {
      return cheerio.load(body);
    }
  };

  // The only difference is that it says "return" here:
  return request(options)
    .then($ => {
      let scrapeTitleArray = [];
      $(".some-class-in-html").each(function(i, obj) {
        const data = $(this)
          .text()
          .trim();
        scrapeTitleArray.push(data);
      });
      return scrapeTitleArray;
    })
    .catch(err => {
      console.log(err);
    });
};
Gershom Maes
  • 7,358
  • 2
  • 35
  • 55