0

Why does my var i in for loop change from 0 to 1?

This line: for for (var i = 0; i < siteDatabase.length; i++) increments 0 to 1 ?in the first loop? which then causes siteDatabase[i] to access element at index 1 instead of 0. I thought i++ increment on the 2nd loop?

The result is a error by method call (push) to an undefined element.

var newsUpdates = {};
var siteDatabase = [
    "http://example.to"
];

var scraperjs = require('scraperjs');
for (var i = 0; i < siteDatabase.length; i++) {
    news[siteDatabase[i]] = [];

    scraperjs.StaticScraper.create(siteDatabase[i])
        .scrape(function($) {
            return $(".lang_English").map(function() {
                return $(this).children('td').eq(1).children(
                    'a').last().text();
            }).get();
        }, function(news) {
            for (var x = 0; x < news.length; x++) {
                if (news[x] == '') {
                    news.splice(x, 1);
                }
            }
            for (var x = 0; x < news.length; x++) {
                // i in siteDatabase[i] is not 0, but 1??
                newsUpdates[siteDatabase[i]].push({
                    "title": news[x]
                });
                // TypeError: Cannot call method 'push' of undefined
            }
            console.log(newsUpdates);
        })
}

The problem is on the area I comment between the codes.

rawnuggets
  • 139
  • 1
  • 2
  • 6
  • possible duplicate of [JavaScript closure inside loops – simple practical example](http://stackoverflow.com/questions/750486/javascript-closure-inside-loops-simple-practical-example) – JLRishe Mar 16 '15 at 14:23

2 Answers2

4

The function you're passing into .scrape has an enduring reference to the i variable, not a copy of it as of when the function was created. So it uses the value of i as of when the function runs, which is (presumably) later after the loop has finished, when i's value has changed.

In this case, I'd probably use a builder function to build the callback so the callback closes over something that doesn't change:

var newsUpdates = {};
var siteDatabase = [
    "http://example.to"
];

var scraperjs = require('scraperjs');
for (var i = 0; i < siteDatabase.length; i++) {
    news[siteDatabase[i]] = [];

    scraperjs.StaticScraper.create(siteDatabase[i])
        .scrape(function($) {
            return $(".lang_English").map(function() {
                return $(this).children('td').eq(1).children(
                    'a').last().text();
            }).get();
        }, buildCallback(i))
}

function buildCallback(index) {
    return function(news) {
        for (var x = 0; x < news.length; x++) {
            if (news[x] == '') {
                news.splice(x, 1);
            }
        }
        for (var x = 0; x < news.length; x++) {
            newsUpdates[siteDatabase[inindex]].push({
                "title": news[x]
            });
        }
        console.log(newsUpdates);
    };
}

There, the function we return out of buildCallback closes over the index argument, whose value never changes. Then we pass i into it so build our callback.

T.J. Crowder
  • 1,031,962
  • 187
  • 1,923
  • 1,875
  • Brilliant! That worked, but why do I get the same result if I change i to j? If .scrape is using var i, then var j wouldn't be affected, but it is...? – rawnuggets Mar 16 '15 at 14:37
  • I meant that if I change the var i counter in the for loop to a var j, why is the var j counter still affected even though it's not i. – rawnuggets Mar 16 '15 at 15:56
  • @combatmath: If you really replaced all of the `i` with `j`, it would be affected too. – T.J. Crowder Mar 16 '15 at 15:57
0

Since siteDatabase appears to be an array, an alternative way to resolve this is to use siteDatabase.forEach, which will give you a separate closure variable for each iteration. I prefer this approach to the builder function approach because it is more straightforward and easy to follow:

var newsUpdates = {};
var siteDatabase = [
    "http://example.to"
];

var scraperjs = require('scraperjs');

siteDatabase.forEach(function (site) {
    scraperjs.StaticScraper.create(site)
    .scrape(function($) {
        return $(".lang_English").map(function() {
            return $(this).children('td').eq(1)
                          .children('a').last().text();
        }).get();
    }, function(news) {
        newsUpdates[site] = news.filter(function (item) {
            return item != '';
        }).map(function (item) {
            return { title: item };
        });
        console.log(newsUpdates);
    });
});

You could further break this up into functions to make it cleaner and more expressive:

var newsUpdates = {};
var siteDatabase = [
    "http://example.to"
];

var scraperjs = require('scraperjs');

function scrapePageNewsItems($) {
    return $(".lang_English").map(function() {
        return $(this).children('td').eq(1)
                      .children('a').last().text();
    }).get();
}

function notBlank(item) {
    return item != '';
}

function convertNewsItem(item) {
    return { title: item };
}

function convertNewsItems(news) {
    return news.filter(notBlank).map(convertNewsItem);
}

siteDatabase.forEach(function (site) {
    scraperjs.StaticScraper.create(site)
    .scrape(scrapePageNewsItems, function(news) {
        newsUpdates[site] = convertNewsItems(news);
        console.log(newsUpdates);
    });
});
JLRishe
  • 99,490
  • 19
  • 131
  • 169