1

I have this simple HTMLParser in Node.js using the http module:

var http = require('http');
var options = {
  hostname: 'www.google.com',
  port: 80,
  path: '/',
  method: 'GET'
};

var req = http.request(options, function(res) {
  res.setEncoding('utf8');
  res.on('data', function (chunk) {  
    var title1 = chunk.indexOf("<title>");  
    var title2 = chunk.indexOf("</title>"); 
    var titl = chunk.substring(title1 + 7);
    var result = titl.substring(0, titl.indexOf("</title>"));
    console.log("Title is : " + result);
  });
req.end();
});

req.on('error', function(e) {
  console.log('problem with request: ' + e.message);
});

req.end();

and when executed, iterates more than once, so I get this output in the command line and it varies but always iterates more than once.

Title is: Google
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:
 Title is:

Any help? Thanks in advance!

Vikaton
  • 2,227
  • 3
  • 14
  • 23
  • FWIW if your end goal is to have an actual usable parser, you're probably better off using something like [`cheerio`](https://github.com/cheeriojs/cheerio). – mscdex Nov 22 '14 at 00:47
  • see http://stackoverflow.com/a/7373003/1481489 for information on better ways to parse HTML – zamnuts Nov 22 '14 at 00:47

1 Answers1

0

The data event may fire more than once while information is incoming. You need to store the incoming Buffer (i.e. chunk) into your own buffer and parse when the response is complete. That is why it is referred to as a chunk - it is partial data.

var req = http.request(options, function(res) {
  res.setEncoding('utf8');
  var content = '';
  res.on('data', function (chunk) {
    content += chunk; // concatenate incoming data chunk to a response buffer
  });
  res.once('end', function() { // once the response has ended (it is complete)
    var title1 = content.indexOf("<title>");  // parse
    var title2 = content.indexOf("</title>"); 
    var titl = content.substring(title1 + 7);
    var result = titl.substring(0, titl.indexOf("</title>"));
    console.log("Title is : " + result);
  });
});

You may also want to clean up the response event data handler on end as well:

function storeChunk(chunk) {
  content += chunk;
}
res.on('data',storeChunk);
res.once('end',function() {
  res.removeListener('data',storeChunk);
  // ...
});
zamnuts
  • 9,492
  • 3
  • 39
  • 46