1

What I'm trying to do, is read in a .txt file of words, slit them at newlines, and then for each word in the constructed array, perform operations on the word (that matches the Word Schema I'm using) to determine the letter count for each word like for "word (0 A's, 0 B's, 1 W, 1 O, 1 R, 1 D, O Z's etc...), and then insert each Word into the database.

Here is the mongoose schema for a Word "shape" for database entries (models/words.js)

var restful = require('node-restful');
var mongoose = restful.mongoose;

// MongoDB Schema
var wordSchema = new mongoose.Schema({
    code: String,
    word: String,
    lettersCount: {
        'a': Number,
        'b': Number,
        'c': Number,
        'd': Number,
        'e': Number,
        'f': Number,
        'g': Number,
        'h': Number,
        'i': Number,
        'j': Number,
        'k': Number,
        'l': Number,
        'm': Number,
        'n': Number,
        'o': Number,
        'p': Number,
        'q': Number,
        'r': Number,
        's': Number,
        't': Number,
        'u': Number,
        'v': Number,
        'w': Number,
        'x': Number,
        'y': Number,
        'z': Number
    }
});

// Return model
module.exports = restful.model(
    'Words',
    wordSchema
);

Now, my data is in the file dictionaries/words.txt.

In the main file called server.js, I'm calling this function:

populateDictionary();

The tasks/populateDictionary.js file has the following function to do the database entries:

var populateDictionary = function(dict) {
    Word.remove().exec();
    fs.readFileAsync('dictionaries/words.txt', 'utf8').then(function(data, err) {
        if (err) throw err;
        var dictionary = data.split('\n');
        for (var i = 0; i < dictionary.length; i++) {
            var entry = new Word({
                word: dictionary[i],
                lettersCount: {
                    'a': 0, 'b': 0, 'c': 0, 'd': 0,
                    'e': 0, 'f': 0, 'g': 0, 'h': 0,
                    'i': 0, 'j': 0, 'k': 0, 'l': 0,
                    'm': 0, 'n': 0, 'o': 0, 'p': 0,
                    'q': 0, 'r': 0, 's': 0, 't': 0,
                    'u': 0, 'v': 0, 'w': 0, 'x': 0,
                    'y': 0, 'z': 0
                }
            });
            for (var j = 0; j < entry.word.length; j++) {
                entry.lettersCount[entry.word[j]]++;
            }
            console.log(entry);
            entry.save();
        }
    });
};

So, I'm fairly new to databases, but think there's a good solution out there to this, just not sure what... I'm basically making a huge call stack, and it's crashing my computer. I'm looking for the right way to do this kind of thing. Thanks!

3 Answers3

2

I'd suggest async library. It has many useful methods. I've used async.eachLimit below which limits the async operations to the number provided.

clearDictionary(function(err){
    if(err){
        throw err;
    }
    else{
        populateDictionary();
    }
})

As remove is also an io call, so it should wait before the operation ends before moving to the next part. That's why wrapped in clearDictionary above. The definitions are:

var async = require("async");

var clearDictionary = funtion(done) {
    Word.remove().exec(function(err){
        if(err){
            done(err);
        }
        else{
            done();
        }
    });
}


var populateDictionary = function() {
    fs.readFileAsync('dictionaries/words.txt', 'utf8').then(function(data, err) {
        if (err) throw err;
        var dictionary = data.split('\n');
        async.eachLimit(dictionary, 20, funtion(word, callback){
            var entry = new Word({
                word: word,
                lettersCount: getLetterCountObj()
            });
            countLetters(entry);
            entry.save(function(err){
                if(err){
                    return callback(err);
                }
                else{
                    return callback();
                }
            });
        }, function(err){
            if(err){
                throw err
            }
            else{
                console.log("Dictionary populated!");
            }
        })
    });
};



var getLetterCountObj = function(){
    return {
        'a': 0, 'b': 0, 'c': 0, 'd': 0,
        'e': 0, 'f': 0, 'g': 0, 'h': 0,
        'i': 0, 'j': 0, 'k': 0, 'l': 0,
        'm': 0, 'n': 0, 'o': 0, 'p': 0,
        'q': 0, 'r': 0, 's': 0, 't': 0,
        'u': 0, 'v': 0, 'w': 0, 'x': 0,
        'y': 0, 'z': 0
    }
}


var countLetters = function (entry){
    for (var j = 0; j < entry.word.length; j++) {
        entry.lettersCount[entry.word[j]]++;
    }
}
Talha Awan
  • 4,573
  • 4
  • 25
  • 40
0

I'm not very familiar with the exact technologies you are using, but from a structure/ logic flow standpoint maybe this will help:

I think your issue might be that you parse the entire file into memory before processing: that's a lot to hold on to when in reality you just want to process one word at a time. Some quick Googling lead me to this article which makes it seem like you could read a line from your file, count it, insert your shape and then move on to the next word which should prevent you eating a ton of memory.

Kevin Seymour
  • 766
  • 9
  • 25
0

You can sequentially execute mixture of IO calls and logic using nsynjs. Here are the steps how code needs to be transformed:

Step 1. Wrap slow functions with callbacks into nsynjs-aware wrappers:

dbWrappers.js:

// wrapper for remove
exports.remove = function (ctx, collection) {
     collection.remove().exec(function(err){
        ctx.resume(err);
     });
};
exports.remove.nsynjsHasCallback = true;

// wrapper for save
exports.save = function (ctx, entry) {
     entry.save(function(err){
        ctx.resume(err);
     });
};
exports.save.nsynjsHasCallback = true;

For readFileAsync you can use this wrapper: https://github.com/amaksr/nsynjs/blob/master/wrappers/nodeFsReadFile.js

Step 2. Write your logic as if it was synchronous, and put it into function:

var populateDictionary = function(Word, dbWrappers, readFile) {
    dbWrappers.remove(nsynjsCtx,dict); // use wrapper .remove from above,
                    // nsynjs will wait until callback in the wrapper complete

    var data = readFile(nsynjsCtx, 'path').data; // use wrapper for fs.readFile

    var dictionary = data.split('\n');

    for (var i = 0; i < dictionary.length; i++) {
        var entry = new Word({
            word: dictionary[i],
            lettersCount: {
                'a': 0, 'b': 0, 'c': 0, 'd': 0,
                'e': 0, 'f': 0, 'g': 0, 'h': 0,
                'i': 0, 'j': 0, 'k': 0, 'l': 0,
                'm': 0, 'n': 0, 'o': 0, 'p': 0,
                'q': 0, 'r': 0, 's': 0, 't': 0,
                'u': 0, 'v': 0, 'w': 0, 'x': 0,
                'y': 0, 'z': 0
            }
        });
        for (var j = 0; j < entry.word.length; j++) {
            entry.lettersCount[entry.word[j]]++;
        }
        console.log(entry);
        dbWrappers.save(nsynjsCtx,entry); // use wrapper '.save' from step 1
    }
};

Step 3. Run that function in synchronous manner via naynjs:

var dbWrappers = require('dbWrappers');
var readFile = require('nodeFsReadFile').readFile;

var populateDictionary = function(Word, dbWrappers, readFile) {
    ....
}

nsynjs.run(populateDictionary,{},Word, dbWrappers, readFile, function(){
    console.log('loading done');
})

See similar example https://github.com/amaksr/nsynjs/tree/master/examples/node-mysql (it inserts any number records into MySQL).

amaksr
  • 7,555
  • 2
  • 16
  • 17