10
var http = require('http');
var urlOpts = {host: 'www.nodejs.org', path: '/', port: '80'};
http.get(urlOpts, function (response) {
response.on('data', function (chunk) {
var str=chunk.toString();
var re = new RegExp("(<\s*title[^>]*>(.+?)<\s*/\s*title)\>", "g")
console.log(str.match(re));
});

});

Output

user@dev ~ $ node app.js [ 'node.js' ] null null

I only need to get the title.

caitriona
  • 8,569
  • 4
  • 32
  • 36
user1777212
  • 103
  • 1
  • 5

2 Answers2

7

I would suggest using RegEx.exec instead of String.match. You can also define the regular expression using the literal syntax, and only once:

var http = require('http');
var urlOpts = {host: 'www.nodejs.org', path: '/', port: '80'};
var re = /(<\s*title[^>]*>(.+?)<\s*\/\s*title)>/gi;
http.get(urlOpts, function (response) {
    response.on('data', function (chunk) {
        var str=chunk.toString();
        var match = re.exec(str);
        if (match && match[2]) {
          console.log(match[2]);
        }
    });    
});

The code also assumes that the title will be completely in one chunk, and not split between two chunks. It would probably be best to keep an aggregation of chunks, in case the title is split between chunks. You may also want to stop looking for the title once you've found it.

bdukes
  • 152,002
  • 23
  • 148
  • 175
  • @argonius has a good point in his example, that you should probably also be using the `i` flag, in addition to `g`, to make the regular expression case insensitive (since the casing of the `` tag isn't guaranteed to be lowercase, especially if the document isn't XHTML). – bdukes Oct 26 '12 at 14:04
  • won't work with SPA web pages when title is set with JavaScript. You should use headless browser like google chrome – Lukas Liesis Aug 05 '17 at 12:05
2

Try this:

var re = new RegExp("<title>(.*?)</title>", "i");
console.log(str.match(re)[1]);
gradosevic
  • 4,809
  • 2
  • 36
  • 51
  • `E:\╨а╨░╨▒╨╛╤З╨╕╨╣ ╤Б╤В╨╛╨╗\dev\app.js:7 console.log(str.match(re)[1]); ^ TypeError: Cannot read property '1' of null at IncomingMessage. (E:\╨а╨░╨▒╨╛╤З╨╕╨╣ ╤Б╤В╨╛╨╗\dev\app.js:7:26) at IncomingMessage.EventEmitter.emit (events.js:93:17) at IncomingMessage._emitData (http.js:359:10) at HTTPParser.parserOnBody [as onBody] (http.js:123:21) at Socket.socketOnData [as ondata] (http.js:1367:20) at TCP.onread (net.js:403:27)` – user1777212 Oct 26 '12 at 13:53
  • Worked for me, thanks! – Gene Bo Sep 23 '15 at 00:17