Would suggest using the other bulk API method upsert()
which will afford you to get in your BatchWriteResult()
object the _id
values of the inserted documents by calling its getUpsertedIds()
method. The result object is in the same format as given in the documentation for BulkWriteResult
.
The update operation with the Bulk.find.upsert()
option will perform an insert when there are no matching documents for the Bulk.find()
condition. If the update document does not specify an _id
field, MongoDB adds the _id
field and thus you can retrieve the id's of the inserted document
within your BatchWriteResult()
.
Also, the way you are queing up your bulk insert operations is not usually recommened since this basically builds up in memory; you'd want to have a bit of more control with managing the queues and memory resources other than relying on the driver's default way of limiting the batches of 1000 at a time, as well as the complete batch being under 16MB. The way you can do this is to use the forEach()
loop of your data array with a counter that will help limit the batches to 1000 at a time.
The following shows the above approach
function getInsertedIds(result){
var ids = result.getUpsertedIds();
console.log(ids); // an array of upserted ids
return ids;
}
db.collection('myCollection',function(err,collection) {
var bulk = collection.initializeUnorderedBulkOp(),
insertedIds = [],
counter = 0;
dataArray.forEach(function (data){
bulk.find(data).upsert().updateOne(data);
counter++;
if (counter % 1000 == 0) {
bulk.execute(function(err, result) {
insertedIds = getInsertedIds(result);
bulk = collection.initializeUnorderedBulkOp(); // reset after execute
});
}
});
// Clean up the remaining operations in the queue which were
// cut off in the loop - counter not a round divisor of 1000
if (counter % 1000 != 0 ) {
bulk.execute(function(err, result) {
insertedIds = insertedIds.concat(getInsertedIds(result));
console.log(insertedIds);
});
}
});