0

The following code, which inserts 200,000 records into PostgreSQL server from node.js, is taking about 17 minutes on my laptop PC, which feels horribly slow.

var pg = require('pg');
var Client = pg.Client;
var async = require('async');

var client = new Client(connectionString);
client.connect();

var rollback = function(client) {
  client.query('ROLLBACK', function() {
    client.end();
    process.kill();
  });
};

client.query('BEGIN',function(err,result){
  if(err){ console.error(err); rollback(client);};
  async.waterfall([
    function(cb){
      client.query('DROP INDEX idx',function(err,result){
        client.query('TRUNCATE TABLE tbl',function(err,result){
          async.forEach(values,function(value,valueNext){
            client.query('INSERT INTO tbl ('
                         + 'col1,'
                         + 'col2) VALUES ($1,$2)',[
                           value,
                           generatedSomething(value)
                         ],function(err){
                           valueNext();
                         });
          },function(err,result){
            if(err){ console.error(err); rollback(client);cb(false);return;};
            client.query('CREATE INDEX idx ON tbl',function(err,result){
              cb(null);
            });
          });
        });
      });
    });
  },
],function(err){
  client.query('COMMIT', client.end.bind(client));
});

There are some strategies I've applied to speed up.

  • Drop all indices before insertion, create it after all insertion is done ... ok
  • Use TRUNCATE TABLE instead of DELETE FROM ... ok
  • Use COPY FROM instead of INSERT INTO ... not done

It seems that using COPY FROM instead of INSERT INTO will make effect, but it's used for imported CSV files, not for script-generated values.

So, does it mean that exporting script-generated values to temporary CSV file, and importing values using COPY FROM, is the most effictive way to insert values into PostgreSQL quickly?

Izumi Kawashima
  • 1,197
  • 11
  • 25

1 Answers1

1

copyFrom will return a WritableStream that you can append your values to as CSV, like:

var stream = client.copyFrom("COPY tbl (col1, col2) FROM STDIN WITH CSV");
stream.on('close', function() {
  client.query("COMMIT");
});
stream.on('error', rollback);
async.forEach(values, function(value, valueNext) {
  stream.write(value + "," + generatedSomething(value) + "\n");
});
stream.end();

Of course you will need to properly escape your values

Vlad
  • 10,602
  • 2
  • 36
  • 38