44

I need to parse a file line by line in the following format with Node.js:

13
13
0 5
4 3
0 1
9 12
6 4
5 4
0 2
11 12
9 10
0 6
7 8
9 11
5 3

It represents a graph. The first two lines are the number of edges and vertexes, followed by the edges.

I can accomplish the task with something like:

var fs = require('fs');
var readline = require('readline');
var read_stream = fs.createReadStream(filename);
var rl = readline.createInterface({
    input: read_stream
});
var c = 0;
var vertexes_number;
var edges_number;
var edges = [];
rl.on('line', function(line){
    if (c==0) {
        vertexes_number = parseInt(line);
    } else if (c==1) {
        edges_number = parseInt(line);
    } else {
        edges.push(line.split(' '));
    }
    c++;
})
.on('end', function(){
    rl.close();
})

I understand this kind of things might not be what Node.js was thought for, but the cascaded if in the line callback does not really look elegant / readable to me.

Is there a way to read synchronously lines from a stream like in every other programming language?

I'm open to use plugins if there is not a built-in solution.

[EDIT]

Sorry, I should have made clearer that I would like to avoid loading the whole file in memory beforehand

Andrea Casaccia
  • 4,802
  • 4
  • 29
  • 54
  • https://nodejs.org/api/fs.html#fs_fs_readfilesync_file_options – Alexey Ten Dec 11 '15 at 12:07
  • yes load it with `fs.readFileSync` and then parse it with your code synchronously after splitting by newlines i.e `lines = fs.readFileSync('graph.txt').split(/[\n\r]/);` – Nikos M. Dec 11 '15 at 12:08

4 Answers4

82

My usual code part for such simple tasks:

var lines = require('fs').readFileSync(filename, 'utf-8')
    .split('\n')
    .filter(Boolean);

lines is an array of strings without empty ones.

Alexey Ten
  • 13,794
  • 6
  • 44
  • 54
  • 22
    Thanks. However I'm still interested in lean solutions that don't load the whole file in memory. – Andrea Casaccia Dec 11 '15 at 12:48
  • 10
    This way can not deal with big file, such as 10000000 lines log file, because you can load big file to memory with buffer, but nodejs toString method can not deal with too big buffer object. – Sugar Nov 16 '17 at 02:51
  • 2
    It works pretty good if you have a billion txt log files each a few thousand lines long. Where each txt file is a single test execution log and you want to parse them all for relevant data and dump that data into a JSON log format for the purpose of migrating old test data into a new Kabana dashboard using an AWS ECS cluster. ;-) – Seth Eden Dec 11 '18 at 18:52
  • 2
    dont forget `const {EOL} = require('os');` for MultiOS instead of '\n' – Ryu S. Feb 13 '22 at 02:42
  • I really really appreciate this pinpointing answer. For just in case someone encounters "TypeError: aaa.split is not a function", using "readFileSync.toString()" can help. – Rich KS Aug 01 '22 at 12:20
  • @RichKS second parameter of `readFileSync` ensures that it must return a string so it'll have `.split` method. – Alexey Ten Aug 01 '22 at 12:43
19

This project on github.com does exactly what I needed:

https://github.com/nacholibre/node-readlines

var readlines = require('n-readlines');
var liner = new readlines(filename);

var vertexes_number = parseInt(liner.next().toString('ascii'));
var edges_number = parseInt(liner.next().toString('ascii'));
var edges = [];
var next;
while (next = liner.next()) {
    edges.push(next.toString('ascii').split(' '));
}
Andrea Casaccia
  • 4,802
  • 4
  • 29
  • 54
  • is there a good way to do this with stdin? I am using a code submission website and cannot read /dev/stdin as a file either. – Colin D Apr 26 '17 at 15:01
  • You would have better chances of getting an answer if you posted a new question with more details rather than commenting here. – Andrea Casaccia Apr 27 '17 at 13:47
  • Already done http://stackoverflow.com/questions/43638105/how-to-get-synchronous-readline-or-simulate-it-using-async-in-nodejs – Colin D Apr 27 '17 at 13:50
3

Personally, I like to use event-stream to deal with streams. It's not necessary here but I used it for the code sample. It's simple, I parse to int and put everything inside edges, then when the file reading is done, I take the first element wich is vertexes_number, the new first element is edges_number

var fs = require('fs');
var es = require('event-stream');

var filename = 'parse-file.txt';

var vertexes_number, edges_number;
var edges = [];

fs.createReadStream(filename)
    .pipe(es.split()) // split by lines
    .pipe(es.map(function (line, next) {
        // split and convert all to numbers
        edges.push(line.split(' ').map((n) => +n));

        next(null, line);
    })).pipe(es.wait(function (err, body) {
        // the first element is an array containing vertexes_number
        vertexes_number = edges.shift().pop();

        // the following element is an array containing edges_number
        edges_number = edges.shift().pop();

        console.log('done');
        console.log('vertexes_number: ' + vertexes_number);
        console.log('edges_number: ' + edges_number);
        console.log('edges: ' + JSON.stringify(edges, null, 3));
    }));
Shanoor
  • 13,344
  • 2
  • 29
  • 40
2

Why not read them all into an array and then take out the first two elements with splice. I assume that your example is much simplified or else you would just read the whole file into memory and split it. If your actual case stores multiple graphs and you want to do something when each one is loaded for instance, you can put a test in your line event

var fs = require('fs');
var readline = require('readline');
var read_stream = fs.createReadStream(filename);
var rl = readline.createInterface({
    input: read_stream
});

var buffer = [];

rl.on('line', function(line){
    buffer.push(line.split(' '));
    //Not sure what your actual requirement is but if you want to do 
    //something  like display a graph once one has loaded
    //obviously need to be able to determine when one has completed loading
    if ( buffer.length == GRAPHLENGTH) {  //or some other test
        displayGraph(buffer);
        buffer = [];
    }    
})
.on('close', function(){
    //or do it here if there is only one graph
    //displayGraph(buffer);
    rl.close();
})

function displayGraph(buffer){
    var vertexes_number = parseInt(buffer.splice(0,1));
    var edges_number = parseInt(buffer.splice(0,1));
    var edges = buffer;

    //doYourThing(vertexes_number, edges_number, edges);
}
toriningen
  • 7,196
  • 3
  • 46
  • 68
Dave Pile
  • 5,559
  • 3
  • 34
  • 49