20

I'm trying to read a file line by line, perform some action that has a callback and when the function finishes to resume line reading. For example:

var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
var instream = fs.createReadStream('./phrases.txt');
var outstream = new stream;
var rl = readline.createInterface(instream, outstream);
rl.on('line', function (line) {
  rl.pause();
  setTimeout(function () {
    console.log('resuming');
    rl.resume();
  }, 2000);
});

I was under the impression the example above should basically read a line, wait for 2 seconds, console.log and then continue to the next line. What really happens is that it waits for the first 2 seconds and then spews out lots of console.log

kiril
  • 4,914
  • 1
  • 30
  • 40
Or Weinberger
  • 7,332
  • 23
  • 71
  • 116
  • This is because `rl.on` fires very rapidly in succession. It is asynchronous, so one `rl.on` doesn't wait for the previous one to complete. It fires whenever the next line has been read, which should be pretty fast. So if `rl.on` fires one millisecond after another, your time out function merely starts in parallel one millisecond after another. So if if you have 10 lines in the file, the total delay is 2.000010s, and not 20s! – Old Geezer Aug 12 '18 at 07:37
  • Hi! Please, can you mark my answer as valid if it works for you? Thanks! :) – Javi Marzán Jan 20 '22 at 19:35

7 Answers7

21

Line by Line module helps you reading large text files, line by line, without buffering the files into memory.

You can process the lines asynchronously. This is the example provided:

var LineByLineReader = require('line-by-line'),
    lr = new LineByLineReader('big_file.txt');

lr.on('error', function (err) {
    // 'err' contains error object
});

lr.on('line', function (line) {
    // pause emitting of lines...
    lr.pause();

    // ...do your asynchronous line processing..
    setTimeout(function () {

        // ...and continue emitting lines.
        lr.resume();
    }, 100);
});

lr.on('end', function () {
    // All lines are read, file is closed now.
});
Javier Ferrero
  • 8,741
  • 8
  • 45
  • 50
  • 1
    This is actually synchronous. Because each line will be processed only until the last line has been processed? – Haoyuan Ge Sep 09 '16 at 04:03
  • This should be the accepted answer. It solves exactly what the question asks. After several hours of trying to figure this out on my own, this is the solution I ended up using. – Levi Roberts Jun 10 '17 at 05:29
  • This is the real answer. Other modules just pretend to be async.You really need something like pause stream. – mauron85 Nov 08 '17 at 07:27
  • This solution uses events, which are hard to scale and have become less popular over the years. Another solution below uses `line-reader`, which uses a callback, for processing, and promises, for wrap up. – dirkjot May 30 '18 at 07:31
13

Solution without installing any external library. You only need the native node.js "readline" module. Just do the following:

import fs from "fs";
import readline from "readline";

const readInterface = readline.createInterface({
  input: fs.createReadStream(path.join(__dirname, 'myfile.txt'))
});

for await (const line of readInterface){
  await someAsynchronousOperation();
}

Source (Official documentation): https://nodejs.org/api/readline.html#rlsymbolasynciterator

Javi Marzán
  • 1,121
  • 16
  • 21
5

A very nice line-reader module exists, https://github.com/nickewing/line-reader

simple code:

 var lineReader = require('line-reader');
   lineReader.eachLine('file.txt', function(line, last) {
      // do whatever you want with line...
      console.log(line);
      if(last){
         // or check if it's the last one
      }
   });

also "java-style" interface for more control:

lineReader.open('file.txt', function(reader) {
  if (reader.hasNextLine()) {
    reader.nextLine(function(line) {
      console.log(line);
    });
  }
});

Another cool solution:

var fs = require('fs'),
    sleep = require('sleep'),
    readline = require('readline');

var rd = readline.createInterface({
    input: fs.createReadStream('phrases.txt'),
    output: process.stdout,
    terminal: false
});

rd.on('line', function(line) {
    console.log('-------')
    console.log(line);
    sleep.sleep(2)

});
Zeeshan Hassan Memon
  • 8,105
  • 4
  • 43
  • 57
2
function createLineReader(fileName){
    var EM = require("events").EventEmitter
    var ev = new EM()
    var stream = require("fs").createReadStream(fileName)
    var remainder = null;
    stream.on("data",function(data){
        if(remainder != null){//append newly received data chunk
            var tmp = new Buffer(remainder.length+data.length)
            remainder.copy(tmp)
            data.copy(tmp,remainder.length)
            data = tmp;
        }
        var start = 0;
        for(var i=0; i<data.length; i++){
            if(data[i] == 10){ //\n new line
                var line = data.slice(start,i)
                ev.emit("line", line)
                start = i+1;
            }
        }
        if(start<data.length){
            remainder = data.slice(start);
        }else{
            remainder = null;
        }
    })

    stream.on("end",function(){
        if(null!=remainder) ev.emit("line",remainder)
    })

    return ev
}


//---------main---------------
fileName = process.argv[2]

lineReader = createLineReader(fileName)
lineReader.on("line",function(line){
    console.log(line.toString())
    //console.log("++++++++++++++++++++")
})
  • You're basically rewrote the `readline` built-in module without solving OP's issue, where your `lineReader`'s line event will fire off without waiting for async events – Cardin Aug 20 '19 at 05:55
2

Here is a simple solution in typescript using line-reader that can run in nodejs 8:

import lineReader from 'line-reader';

function readLines(filename: string, processLine: (line: string) => Promise<void>): Promise<void> {
  return new Promise((resolve, reject) => {
    lineReader.eachLine(filename, (line, last, callback) => {
      if (!callback) throw new Error('panic');
      processLine(line)
        .then(() => last ? resolve() : callback())
        .catch(reject);
    });
  });
}

async function echo(): Promise<void> {
  await readLines('/dev/stdin', async (line) => {
    console.log(line);
  });
}

echo();

Note that it does not buffer the whole file before executing, therefore it is suitable for processing large text files.

raugfer
  • 1,844
  • 19
  • 19
2

I suggest to use stdio for this kind of things, as input stream is paused and resumed automatically and you don't need to worry about your system resources. You'll be able to read really huge files with just a few MBs of memory:

This example prints a line every 2 seconds:

$ node myprogram.js < file.txt
import { read } from 'stdio';

async function onLine (line) {
  console.log(line);
  await sleep(2000);
}

read(onLine)
  .then(() => console.log('finished'));

Note I'm using an asynchronous sleep to represent any asynchronous task. It is not included in Node.js by default but it would be as follows:

const sleep = (delay) => new Promise((resolve) => setTimeout(resolve, delay));
sgmonda
  • 2,615
  • 1
  • 19
  • 29
-2
const readline = require('readline');
const fs = require('fs');

const rl = readline.createInterface({
  input: fs.createReadStream('sample.txt')
});

rl.on('line', (line) => {
  console.log(`Line from file: ${line}`);
});

source: https://nodejs.org/api/readline.html#readline_example_read_file_stream_line_by_line

RoKK
  • 617
  • 8
  • 17
  • 2
    Although your code is cleaner, it is functionally no different than the code in the question. Adding a setTimeout does exactly the same in your code. I too have been looking for a solution to my problem for the past couple hours. There doesn't seem to be any good native (inbuilt nodejs) way to accomplish running tasks before continuing onto the next line. – Levi Roberts Jun 10 '17 at 05:26
  • 2
    This doesn't do what he's asking: `I'm trying to read a file line by line, perform some action that has a callback *and when the function finishes to resume line reading*` – UpTheCreek Nov 22 '18 at 10:53