0

I am writing a Node.js app and I am having trouble returning the value of my web scrape to my main app.get function. The page gets scraped just fine and it the results make it all the way to the RETURN in my callback, but it doesn't actually return the value. Any help would be appreciated!

EDIT: This needs to be a pure javascript solution and does not uses jQuery.

In my server.js file I have this code:

var machineDetails = helpers.scrapePage();

app.get('/', function (req, res) {

    res.render('index', { machineDetails: machineDetails, title: 'VIP IT Dashboard'});
});

In a helpers.js file I have the following functions

//Requires
httpntlm = require('httpntlm'),
cheerio = require('cheerio');


var userData;

function callSite(callback) {

    //Scrape site and get information
    var scrape;

    httpntlm.get({
        url: "http://URLthatIamScraping.com",
        username: 'username1',
        password: 'password1',
        domain: 'companyDomain'
    }, function (err, res) {
        if (err) return err;

        //Sort information for the computer
        var $ = cheerio.load(res.body);

        var scrape = $('#assetcontent').html();

        //Return the html content of the page
        callback(scrape);

    });
}


exports.scrapePage = function(){

    return callSite(function(data) {

        //This is called after HTTP request finishes
        userData = data;

        //ISSUE: userData is not actually making it back to my server.js variable called "machineDetails"
        return userData;

    });
}
RandomDeduction
  • 565
  • 1
  • 5
  • 17

1 Answers1

1

It's asynchronous, you can't just return the value. It has to be returned in the callback.

//Requires
httpntlm = require('httpntlm'),
cheerio = require('cheerio');


function callSite(callback) {

    httpntlm.get({
        url: "http://URLthatIamScraping.com",
        username: 'username1',
        password: 'password1',
        domain: 'companyDomain'
    }, function (err, res) {
        if (err) return callback(err);

        //Sort information for the computer
        var $ = cheerio.load(res.body);

        var scrape = $('#assetcontent').html();

        //Return the html content of the page
        callback(null, scrape);

    });
}


exports.scrapePage = callSite;

Then you do:

app.get('/', function (req, res, next) {
    helpers.scrapePage(function(error, machineDetails) {
        if(error) return next(error);
        res.render('index', { machineDetails: machineDetails, title: 'VIP IT Dashboard'});
    });
});
Ben Fortune
  • 31,623
  • 10
  • 79
  • 80
  • Excellent! I moved `helper.scrapePage` outside of app.get so it wasn't called every time someone accessed `'/'`. Thanks so much! – RandomDeduction Oct 29 '14 at 16:17