While requesting HTTP responses with Node.js and importing these into MongoDB, I noticed one or two URLs will have headers that contain illegal characters (since they are being used keys) which will crash the entire script as I try to import into MongoDB. An example is below:
{
"url": "divensurf.com",
"statusCode": 200,
"headers": {
"x-varnish": "2236710953 2236710300",
"vary": "Accept-Encoding,Cookie,X-UA-Device",
"cache-control": "max-age=7200, must-revalidate",
"x-cache": "V1HIT 2",
"content-type": "text/html; charset=UTF-8",
"page.ly": "v4.0",
"x-pingback": "http://divensurf.com/xmlrpc.php",
"date": "Thu, 21 Mar 2013 19:40:59 GMT",
"transfer-encoding": "chunked",
"via": "1.1 varnish",
"connection": "keep-alive",
"last-modified": "Thu, 21 Mar 2013 19:40:57 GMT",
"age": "2"
}
}
The header/key "page.ly"
would crash the script, since it contains an illegal character .
. Are there any ways to sanitize this key/header which is enclosed in a quote by removing these illegal characters before I import this document into MongoDB?
Below is the code in which I request responses:
(function (i){
http.get(options, function(res) {
var obj = {};
obj.url = hostNames[i];
obj.statusCode = res.statusCode;
obj.headers = res.headers;
db.scrape.save(obj); // imports headers into MongoDB
}).on('error',function(e){
console.log("Error: " + hostNames[i] + "\n" + e.stack); // prints error stack onto console
})
})(i);
For example, it would be from "page.ly"
to "pagely"
EDIT: SOLVED. Check Gael's answer.