0

I use http.request to curl a webpage,this is my code;

var curlUtil=function(option,callback){
    var bufferHelper = new BufferHelper();
    console.log(option);
    var data='';
    var req=http.request(option,function(res){
            if(res.statusCode==301||res.statusCode==302){
              var link=res.headers.location;
              console.log('redirect:'+link);
              curlUtil(link,function(data){
                callback(data);
              });
              return;
            }
            res.on('data',function(chunk){
                    bufferHelper.concat(chunk);
            });
            res.on('end',function(){
                    var data=bufferHelper.toBuffer().toString();
                    callback(data);
            });
    });
    req.end();
};

sometimes it works fine, but sometimes it will get broken data, and the req.on('end'); will be called when there is still data waiting to get. I don't know how this happen, but it did happen, anyone who can tell me what should i do?

Arnold
  • 210
  • 1
  • 6
  • 13
  • What do you mean by *broken data*? Can you show us an example of the `option` object? – Paul Mougel Dec 24 '13 at 13:17
  • Plz make sure this is not a timeout case as explained here: http://stackoverflow.com/questions/6214902/how-to-set-a-timeout-on-a-http-request-in-node – Nitin... Dec 24 '13 at 13:25
  • I request http://s.taosem.com/detail?ww=2011%E7%BE%8E%E7%9A%84%E5%88%AB%E8%87%B4 this page. sometimes i only get parts of the html. – Arnold Dec 24 '13 at 13:28

2 Answers2

0

To identify a timeout try putting something like this:

req.on('socket', function (socket) {
  socket.on('timeout', function() {
    console.log('Timed out');
    req.abort();
  });
}

If it confirms this is timeout, you can increase this value:

req.on('socket', function (socket) {
  socket.setTimeout(60000);  
  socket.on('timeout', function() {
    req.abort();
  });
}
Nitin...
  • 1,274
  • 10
  • 18
-1
var curlUtil=function(option,callback){
    var bufferHelper = new BufferHelper();
    console.log(option);
    var data='';
    var req=http.request(option,function(res){
            if(res.statusCode==301||res.statusCode==302){
              var link=res.headers.location;
              console.log('redirect:'+link);
              curlUtil(link,function(data){
                callback(data);
              });
              return;
            }
            res.on('data',function(chunk){
                    bufferHelper.concat(chunk);
            });
            res.on('end',function(){
                    var data=bufferHelper.toBuffer().toString();
                    callback(data);
            });
    });
    // req.end(); 
};

req.end() will tigger 'end' event. even though data is translating

Nelson.li
  • 521
  • 4
  • 6
  • I read the nodejs api (http://nodejs.org/docs/latest/api/http.html#http_http_request_options_callback),there is a example `Note that in the example req.end() was called. With http.request() one must always call req.end() to signify that you're done with the request - even if there is no data being written to the request body.` I have to call the .end() method, otherwise it will alway request. – Arnold Dec 24 '13 at 13:11