-1

So I've been playing around with web scraping, I have this mostly working but when I try outputing the array elements they are all undefined (see output at the bottom). I feel like it may be related to a timing issue, I say that because the prices are never in the same order like the requests take different amounts of time to be answered. IF that is the issue how do I get them in sync? Thanks!

const request = require('request');
const cheerio = require('cheerio');
const fs = require('fs');
const writeStream = fs.createWriteStream('post.csv');

var numbers = ["2202917",
"2205112",
"3514318",
"3514561",
"3585503",
"3585704",
"3610075",
"5132753",
"5247359",
"5247360"];

var y =0;
var partNumber1 ="";
var price1 ="";

writeStream.write('PartNumber,Price \n');

for (y=0; y < numbers.length; y++){
  request(url + numbers[y], function(error, response, html) {
      if (!error && response.statusCode == 200) {
      const $ = cheerio.load(html);
      price1 =$('.price').text().replace(/\s\s+/g,'');
      partNumber1 =numbers[y];
      console.log(partNumber1,' ',price1);
      writeStream.write(`${partNumber1}, ${price1} \n`);
    }
  });
}

Output

C:\Program Files\nodejs>node mp2.js
undefined ' ' '$8.99'
undefined ' ' '$127.88'
undefined ' ' '$43.22'
undefined ' ' '$27.38'
undefined ' ' '$21.41'
undefined ' ' '$41.46'
undefined ' ' '$21.57'
undefined ' ' '$47.99'
undefined ' ' '$1267.30'
undefined ' ' '$22.04'

C:\Program Files\nodejs>
Luca Kiebel
  • 9,790
  • 7
  • 29
  • 44
Garrett Pe
  • 31
  • 1
  • 3
  • 10

1 Answers1

0

You are doing request() call, which is an async call. So, it will not be in order. Here is a simpler example which I will use to explain the alternatives.

Using timeout to a different time interval to mimic the request call.

var numbers = ["2202917",
"2205112",
"3514318",
"3514561",
"3585503",
"3585704",
"3610075",
"5132753",
"5247359",
"5247360"];

/*
for (var y=0; y < numbers.length; y++) {
  setTimeout(function(error, response, html) {

    // the problem here is that it is referring a closure variable. The value of y keeps changing outside, and is not the same as the one you called it with.
    // In this particular case, the value of y = numbers.length, and hence the undefined output

    console.log(numbers[y]);

  }, y % 100);
}
*/

// Alternative 1
// A better way would be like this
for (var y=0; y < numbers.length; y++) {
  // doRequest(numbers[y]);
}

function doRequest(partNumber) {
  setTimeout(function(error, response, html) {
    console.log(partNumber);    
  }, partNumber % 100); 
}

// end: Alternative 1

// Alternative 2
// in case you want it to be in sync
async function doSomething() {
  for (var y = 0; y < numbers.length; y++) {
    await doPromiseRequest(numbers[y]);
  }
}

function doPromiseRequest(partNumber) {
  return new Promise((resolve, reject) => {
    setTimeout(function(error, response, html) {

      console.log(partNumber);
      resolve();

    }, partNumber % 100); 
  });
}

doSomething();

// end Alternative 2

/*

another alternative is not calling a separate function, but executing an anonymous function in the loop itself. It is not clean at all and not recommended.

for (var y = 0; y < numbers.length; y++) {
  (function (y) {
    setTimeout(function(error, response, html) {

      // the problem here is that it is referring a closure variable. The value of y keeps changing outside, and is not the same as the one you called it with
      console.log(numbers[y]);

    }, y % 100);
  })(y);
}

*/
Jeevan MB
  • 148
  • 1
  • 11