You can use the bulkWrite
API to carry out the updates based on the logic you specified as it handles this better.
For example, the following snippet shows how to go about this assuming you already have the data from the web service you need to update the remote collection with:
mongodb.connect(mongo_url, function(err, db) {
if(err) console.log(err);
else {
var mongo_remote_collection = db.collection("remote_collection_name");
/* data is from http call to an external service or ideally
place this within the service callback
*/
mongoUpsert(mongo_remote_collection, data, function() {
db.close();
})
}
})
function mongoUpsert(collection, data_array, cb) {
var ops = data_array.map(function(data) {
return {
"updateOne": {
"filter": {
"_id": data._id, // or any other filtering mechanism to identify a doc
"lastModified": { "$lt": data.lastModified }
},
"update": { "$set": data },
"upsert": true
}
};
});
collection.bulkWrite(ops, function(err, r) {
// do something with result
});
return cb(false);
}
If the data from the external service is huge then consider sending the writes to the server in batches of 500 which gives you a better performance as you are not sending every request to the server, just once in every 500 requests.
For bulk operations MongoDB imposes a default internal limit of 1000 operations per batch and so the choice of 500 documents is good in the sense that you have some control over the batch size rather than let MongoDB impose the default, i.e. for larger operations in the magnitude of > 1000 documents. So for the above case in the first approach one could just write all the array at once as this is small but the 500 choice is for larger arrays.
var ops = [],
counter = 0;
data_array.forEach(function(data) {
ops.push({
"updateOne": {
"filter": {
"_id": data._id,
"lastModified": { "$lt": data.lastModified }
},
"update": { "$set": data },
"upsert": true
}
});
counter++;
if (counter % 500 === 0) {
collection.bulkWrite(ops, function(err, r) {
// do something with result
});
ops = [];
}
})
if (counter % 500 != 0) {
collection.bulkWrite(ops, function(err, r) {
// do something with result
}
}