read stream line by line,should be good for large files piped into stdin, my version:
var n=0;
function on_line(line,cb)
{
////one each line
console.log(n++,"line ",line);
return cb();
////end of one each line
}
var fs = require('fs');
var readStream = fs.createReadStream('all_titles.txt');
//var readStream = process.stdin;
readStream.pause();
readStream.setEncoding('utf8');
var buffer=[];
readStream.on('data', (chunk) => {
const newlines=/[\r\n]+/;
var lines=chunk.split(newlines)
if(lines.length==1)
{
buffer.push(lines[0]);
return;
}
buffer.push(lines[0]);
var str=buffer.join('');
buffer.length=0;
readStream.pause();
on_line(str,()=>{
var i=1,l=lines.length-1;
i--;
function while_next()
{
i++;
if(i<l)
{
return on_line(lines[i],while_next);
}
else
{
buffer.push(lines.pop());
lines.length=0;
return readStream.resume();
}
}
while_next();
});
}).on('end', ()=>{
if(buffer.length)
var str=buffer.join('');
buffer.length=0;
on_line(str,()=>{
////after end
console.error('done')
////end after end
});
});
readStream.resume();
Explanation:
- to cut it correctly on utf8 letter and not in middle byte set encoding to utf8 it ensures it emits each time full multibyte letter.
- When data is received the input is paused. It is used to block the input until all lines are used up. It prevents overflowing the buffet if the lines processing function is slower than input.
- If there is every time a line without newlines each time. need to accommulate it for all calls and do nothing, return . once there are more than one line also append it and use the accommulated buffer.
- after all the splitted lines were consumed. On the last line push the last line to buffer and resume paused stream.
es6 code
var n=0;
async function on_line(line)
{
////one each line
console.log(n++,"line ",line);
////end of one each line
}
var fs = require('fs');
var readStream = fs.createReadStream('all_titles.txt');
//var readStream = process.stdin;
readStream.pause();
readStream.setEncoding('utf8');
var buffer=[];
readStream.on('data', async (chunk) => {
const newlines=/[\r\n]+/;
var lines=chunk.split(newlines)
if(lines.length==1)
{
buffer.push(lines[0]);
return;
}
readStream.pause();
// let i=0;
buffer.push(lines[0]); // take first line
var str=buffer.join('');
buffer.length=0;//clear array, because consumed
await on_line(str);
for(let i=1;i<lines.length-1;i++)
await on_line(lines[i]);
buffer.push(lines[lines.length-1]);
lines.length=0; //optional, clear array to hint GC.
return readStream.resume();
}).on('end', async ()=>{
if(buffer.length)
var str=buffer.join('');
buffer.length=0;
await on_line(str);
});
readStream.resume();
I did not test the es6 code