The following code, which inserts 200,000 records into PostgreSQL server from node.js, is taking about 17 minutes on my laptop PC, which feels horribly slow.
var pg = require('pg');
var Client = pg.Client;
var async = require('async');
var client = new Client(connectionString);
client.connect();
var rollback = function(client) {
client.query('ROLLBACK', function() {
client.end();
process.kill();
});
};
client.query('BEGIN',function(err,result){
if(err){ console.error(err); rollback(client);};
async.waterfall([
function(cb){
client.query('DROP INDEX idx',function(err,result){
client.query('TRUNCATE TABLE tbl',function(err,result){
async.forEach(values,function(value,valueNext){
client.query('INSERT INTO tbl ('
+ 'col1,'
+ 'col2) VALUES ($1,$2)',[
value,
generatedSomething(value)
],function(err){
valueNext();
});
},function(err,result){
if(err){ console.error(err); rollback(client);cb(false);return;};
client.query('CREATE INDEX idx ON tbl',function(err,result){
cb(null);
});
});
});
});
});
},
],function(err){
client.query('COMMIT', client.end.bind(client));
});
There are some strategies I've applied to speed up.
- Drop all indices before insertion, create it after all insertion is done ... ok
- Use
TRUNCATE TABLE
instead ofDELETE FROM
... ok - Use
COPY FROM
instead ofINSERT INTO
... not done
It seems that using COPY FROM
instead of INSERT INTO
will make effect, but it's used for imported CSV files, not for script-generated values.
So, does it mean that exporting script-generated values to temporary CSV file, and importing values using COPY FROM
, is the most effictive way to insert values into PostgreSQL quickly?