0

NOOb here. I've got a HTTP request that pulls all of the content from a specific webpage. However, all I need is a specific string:"Most recent instantaneous value: ". In fact, I actually need to store the value that follows value:. Here is my code:

var http = require("http");

var options = {
 host: 'waterdata.usgs.gov',
 port: 80,
 path: '/ga/nwis/uv?cb_72036=on&cb_00062=on&format=gif_default&period=1&site_no=02334400',
 method: 'POST'
};

var req = http.request(options, function(res) {
 console.log('STATUS: ' + res.statusCode);
 console.log('HEADERS: ' + JSON.stringify(res.headers));
 res.setEncoding('utf8');
 res.on('data', function (chunk) {
 console.log('BODY: ' + chunk);
 });
});

req.on('error', function(e) {
 console.log('problem with request: ' + e.message);
});

// write data to request body
req.write('data\n');
req.write('data\n');
req.end();

I realize I don't need all the console.log statements, but do I need keep console.log('BODY: ' + chunk); so all of the data downloads?

mnort9
  • 1,810
  • 3
  • 30
  • 54

1 Answers1

0

Never do it the way I'm doing it in this quick'n'dirty example. There are plenty of modules for DOM traversal, HTML/XML parsing, etc... They are a lot safer then a simple regex. But just so you get the general idea:

var http = require("http");

var options = {
    host: 'waterdata.usgs.gov',
    port: 80,
    path: '/ga/nwis/uv?cb_72036=on&cb_00062=on&format=gif_default&period=1&site_no=02334400',
};

function extract (body, cb) {
    if(!body) 
        return;

    var matches=body.match(/Most recent instantaneous value: ([^ ]+) /);
    if(matches)
        cb(matches[1]);
}

http.get(options, function(res) {
    res.setEncoding('utf8');
    res.on('data', function (chunk) {
        extract(chunk, function(v){ console.log(v); });
    });
}).on('error', function(e) {
    console.log('problem with request: ' + e.message);
});

Somehow I also got a different page when sending a POST instead of a GET request. So I changed that bit...

Regarding your second question: No you don't need to keep any of the console.log() statements. Just use callbacks and everything is fine! :-)

Sebastian Stumpf
  • 2,761
  • 1
  • 26
  • 34
  • Thank you. Why do you consider this method of doing it unsafe? Also, this solution is printing both instances of the string. I only need the first instance. Do I need a modifier or something? – mnort9 Apr 04 '12 at 17:46
  • Figured out why it was printing the data twice. – mnort9 Apr 04 '12 at 21:51
  • 1
    Just add a return in front of the callback if you only want the first value. On the topic of parsing HTML I recommend reading [Jeff's opinion](http://www.codinghorror.com/blog/2009/11/parsing-html-the-cthulhu-way.html) about it and of course this answer: http://stackoverflow.com/a/1732454/479133 – Sebastian Stumpf Apr 04 '12 at 23:51