0

EDIT: Updated with full code.

I'm writing javascript code in node.js that is meant to loop through an array of data that I have scraped from the web. The purpose of the code is to:

1) Check the player table to see if that player's name exists in a record, and 2) If that player doesn't exist, add him to the database.

Here is my code:

var cheerio = require('cheerio');
var request = require('request');
var data = [];
var mysql = require('mysql');

var connection = mysql.createConnection({
    host: 'localhost',
    user: 'root',
    password: 'blahblah',
    database: 'test',
    port: 3306 });

connection.connect();

request('http://www.basketball-reference.com/friv/dailyleaders.cgi?month=12&day=28&year=2014', function(err, response, body){
    if(!err && response.statusCode ==200){
        var $ = cheerio.load(body);
        $('td', 'tbody').each(function(){
            var url = $(this).attr('href');
            var text = $(this).text();
            data.push(text);
        });



        for(i=1;i<data.length;i+=26){
            var query = connection.query(
            "SELECT * FROM player WHERE name = '"+data[i]+"'",function(err, result, fields) {
                if (err) throw err;
                if(result.length==0){
                    //var insertQuery = "INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = '"+data[i+1]+"'),1,'"+data[i]+"');";
                    console.log(i);

                }
            });
        }

    }
});

Note that I commented out my INSERT query for testing purposes, but the player name is located in data[i] while the team name is in data[i+1].

To test, I am just printing i to the console, and every loop it prints out the max value of i, which is 3225. However, that console.log command is INSIDE the for loop, so shouldn't it print each value of i as it is incremented?

fullOfQuestions
  • 453
  • 1
  • 11
  • 25

1 Answers1

2

This is similar to this question: JavaScript closure inside loops – simple practical example

That answer explains the why things are acting the way they are.

This happens in closures, if you are referencing a variable in a callback and that variable was already at that max value before the callback was executed.

Here's an example of what your code is doing (bad with callbacks):

function a(){
  var i = 0;
  var callbacks = [];
  
  for (i=0;i<100;i+=25){
    callbacks.push(function(){
      alert(i);
     });
  }
  
  return callbacks;
}


var callbackArray = a();
for( f in callbackArray){
  callbackArray[f]();
}

Here's what it should be doing:

function createFunction(i){
   return function(){
     alert(i);
   }
}

function a(){
      var i = 0;
      var callbacks = [];
      
      for (i=0;i<100;i+=25){
        callbacks.push(createFunction(i));
      }
      
      return callbacks;
    }


    var callbackArray = a();
    for( f in callbackArray){
      callbackArray[f]();
    }

For your fix, you should do something like this:

// outside of the code you posted, as a separate function.
function createPlayerSelectionCallback(data,i){
    return function(err, result, fields) {
                if (err) throw err;
                if(result.length==0){
                    //var insertQuery = "INSERT INTO player (provider_id, team_id, position_id, name) VALUES (1, (SELECT id FROM team WHERE slug = '"+data[i+1]+"'),1,'"+data[i]+"');";
                    console.log(i);    
                }
            }
}


// this for loop stays in the same place, just modified to use the new function.
 for(i=1;i<data.length;i+=26){
    var query = connection.query(
    "SELECT * FROM player WHERE name = '"+data[i]+"'",createPlayerSelectionCallback(data, i))
 }
Community
  • 1
  • 1
ps2goat
  • 8,067
  • 1
  • 35
  • 68
  • Callbacks are a whole new concept for me, so I will have to do some reading up, but this was very helpful, thanks for taking the time! – fullOfQuestions Dec 30 '14 at 22:19