0

I have a very large file that has a ton of JSON Strings(over 100K), with one string on each line.

I want to read each line, insert it into a database, and after item has been inserted, I want to update another document in another database with basic information from the initial insert. And since I am a nodejs newb, I am having trouble wrapping my head around what I am doing wrong. Here is what I have so far.

var lineReader - require("line-reader");

lineReader.eachLine(filePath, function(line, last){
    if(count == 1){
        asyncAdd(JSON.parse(line));
    }
})}

var counter = 0;

function asyncAdd(jsonString){

async.waterfall([
        //this function calls the inserter
    function(callback){
        counter++;

        addJson(jsonString, function(doc){
            callback(null, doc);
            console.log("Added " + counter);
        })

    },
    //This function calls the indexing function
    function(doc, callback){

        console.log("indexing: " + counter);

        updateDBIndex(doc, function(err, savedDocument){
            callback(err, savedDocument);
        });
    }
    ],

    function(err, results){
        if(err){
            return console.error("Error " + err);
        }
        console.log("indexed " + counter);
    });
     }

Basically, if my file looks like:

{"_id": "1", "item":"glove", "color": "red"}\n
{"_id": "4", "item":"hat", "color" : "red"}\n
{"_id": "6", "item":"hat","color" : "blue"}\n

I want the output to look like, added 1 indexing 1 indexed 1 added 2 indexing 2 indexed 2 added 3 indexing 3 indexed 3

Any help will be more than appreciated! Thank you!

James B
  • 15
  • 5
  • what do you mean "database index" ? what does the snippet above output? – Gntem Mar 31 '14 at 19:31
  • Database index is something that I am using to keep track of everything that i have inserted. So in my small file I have above the index would be {"hat" : "2" "glove":"1" "red":"2" "blue":"1". – James B Mar 31 '14 at 19:33
  • Are you writing your own database for this or is one of us misunderstanding how database software works? – barry-johnson Mar 31 '14 at 19:33
  • @JamesB so 'database index' is referred to line number? – Gntem Mar 31 '14 at 19:35
  • All of my items are added to the database correctly, but the asynchronous nature is making the indexing all wrong. I am editing to clarify what I mean. – James B Mar 31 '14 at 19:36
  • I posted a solution to a similar question for parsing a very large file, using a stream, synchronous. see: http://stackoverflow.com/questions/16010915/parsing-huge-logfiles-in-node-js-read-in-line-by-line/23695940#23695940 – Gerard May 16 '14 at 13:24

1 Answers1

0

you could try to following snippet

var lineReader = require("line-reader");
var lineNumber = 0;
lineReader.eachLine(filePath, function (line, last) {
  asyncAdd(JSON.parse(line), lineNumber); // current line number
  lineNumber++; // increment for next one
});
function asyncAdd(jsonString, lineNum/*additional parameter*/) {
  async.waterfall([
      //this function calls the inserter
      function (callback) {
        addJson(jsonString, function (doc) {
          callback(null, doc);
          console.log("Added " + lineNum);
        })
      },
      //This function calls the indexing function
      function (doc, callback) {
        console.log("indexing: " + lineNum);
        updateDBIndex(doc, function (err, savedDocument) {
          callback(err, savedDocument);
        });
      }
    ],
    function (err, results) {
    if (err) {
      return console.error("Error " + err);
    }
    console.log("indexed " + lineNum);
  });
}

hope it works, the original was kind of incomplete.

Gntem
  • 6,949
  • 2
  • 35
  • 48
  • Thanks, for the response, I just don't have a way of reading the lines -line-by-line. i want to pause reading the lines until I have finished updating the database. – James B Apr 01 '14 at 02:56