My goal is to insert VERY large csv's, so right not I use the csv streaming like so:
var myCollection = db.collection(myCollectionId);
var q = async.queue(Collection.insert.bind(myCollection), 10);
csv()
.from.path(myFilePath, {columns: true})
.transform(function(data, index, cb){
q.push(data, function (err, res) {
if (err) return cb(err);
cb(null, res[0]);
});
})
.on('end', function () {
q.drain = function() {
//do some stufff
};
})
.on('error', function (err) {
res.end(500, err.message);
console.log('on.error() executed');
});
});
But when files get REALLY large, like 70M+ and it's streaming them, my server is very slow and it takes forever, and when i try to load pages on the website its lethargic during this process.
Why is it not possible to execute a mongo insert using cron-job like this. I ask because the same insert takes maybe 30 seconds from the mongo command line.
P.S. Don't mind the readFile and lines part, I am doing that because I want to test for when all the lines have been inserted into the collection after the process is started (haven't implemented this yet).
var cronJob = require('cron').CronJob;
var spawn = require('child_process').spawn;
var fs = require('fs');
function MongoImportEdgeFile(dataID, filePath){
var scriptPath = "/local/main/db/mongodb-linux-x86_64-2.4.5/bin/mongoimport";
console.log("script path = "+scriptPath)
var output = "";
fs.readFile(filePath, 'utf-8',function(err, data) {
if (err){
console.log(err)
throw err;
}
//console.log('data = '+data);
var lines = data.split('\n');
console.log("total lines in file = " + lines);
var job = new cronJob(new Date(), function() {
// store reference to 'this', which is cronJob object. needed to stop job after script is done executing.
var context = this;
// execute R script asynchronously
var script = spawn(scriptPath, [" -d mydb -c Data_ForID_" + dataID + " --file " + filePath + " --type csv" ]);
console.log("Executing R script via node-cron: " + scriptPath);
// script has finished executing, so complete cron job and fire completion callback
script.on('close', function() {
console.log('inside script.on(close, function() for import');
context.stop();
});
}, function() {
// callback function that executes upon completion
console.log("Finished executing import");
}, true);
});
}