MongoClient.connect(url, function(err, db) {
var batch = db.collection("chatmessage").initializeOrderedBulkOp();
//var batch = db.collection("chatmessage").db.collection.initializeUnorderedBulkOp()
var messageNum=0;
var chatmessage = null;
var count =0;
for (var i = 0;i<300;i++){
messageNum = getMessage();//value from 1~500000;
for(var j = 0;j<messageNum;j++){
count++;
chatmessage = generateChatMessage();
batch.insert(chatmessage);
if(count>=1000){
count=0;
batch.execute(function(err){
console.log(err);
batch = db.collection("chatmessage").initializeOrderedBulkOp();
console.log("batch execute"+util.inspect(process.memoryUsage()));
//db.close();
});
}
}
console.log("execute one chatroom"+util.inspect(process.memoryUsage()));
}
if(count>0){
batch.execute(function(err){
console.log(err);
batch = db.collection("chatmessage").initializeOrderedBulkOp();
});
}
}
need to populate millions messages into mongo with nodejs.using Bulk method to do the insert batch.
but there are some questions about the code
the bulk execute method run async. when insert data=100,000.have not seen any bulk execute finished,until all the code finish executing,can see the "batch execute" was printed.
when the variable of messageNum is large,about 50,000.it will out of memory.
FATAL ERROR: CALL_AND_RETRY_LAST Allocation failed - process out of memory
the variables are all defined external of the loop.and have run batch.execute. don't understand why this happended.
- when the record is large,the rss is rapidly increase,and never decreased.as it is not managed by V8 engine.it will increase until reaching my computer's memory size.
this is related with DB,when I remove the DB operation,there is no problem. I guess the batch.execute() methond take this memory.but could't release it even with db.close();
{ rss: 1449750528, heapTotal: 1091999056, heapUsed: 922237384 }
-------------------------------------------UPDATE1------------------------------
Have got serveral heapdump snapshoot files with heapdump package.
the root cause is batch.execute method is async called.it never execute until all code are excuted,as i mentioned at my first question.【also doubt that even the batch.execute() is async executed.it should run indepently,not influenced by main process.but i have not found them writen into db,and the info log in callback method have not been printed】 so all documents that need to be inserted into mongo stay in the memory.and cause the issue.
@joeytwiddle have found that you have a common opionion on this problem. from this bulk-upsert-in-mongodb
have not found the bulk.execute() method can be configured sync to execute. anyone have any idea to solve the problem.