3

Hitting the limit of my understanding here. I have a large text file that I need to split into chunks 200 lines each (use mime.types to practice). I'm using readline but it seems to ignore my pause / resume statements. What do I miss:

// Splits a given file into smaller subfiles by line number
var infileName = 'mime.types';
var fileCount = 1;
var count = 0;
var fs = require('fs');
var outfileName = infileName + '.' + fileCount;
var inStream = fs.createReadStream(infileName);
var outStream = fs.createWriteStream(outfileName);
var lineReader = require('readline').createInterface({
  input: inStream
});

lineReader.on('line', function(line) {
  count++;
  lineReader.pause();
  outStream.write(line + '\n');
  if (count >= 200) {
    fileCount++;
    console.log('file ', outfileName, count);
    outStream.close();
    outfileName = infileName + '.' + fileCount;
    outStream = fs.createWriteStream(outfileName);
    count = 0;
  }
  lineReader.resume();
});

lineReader.on('close', function() {
  if (count > 0) {
    console.log('Final close:', outfileName, count);
  }
  inStream.close();
  outStream.close();
  console.log('Done');
});

I also tried to put the change of stream into the callback of the write operation:

lineReader.on('line', function(line) {
  count++;
  lineReader.pause();
  outStream.write(line + '\n', function() {
    if (count >= 200) {
      fileCount++;
      console.log('file ', outfileName, count);
      outStream.close();
      outfileName = infileName + '.' + fileCount;
      outStream = fs.createWriteStream(outfileName);
      count = 0;
    }
    lineReader.resume();
  });
});

The result is the same in both cases:
node split file mime.types.1 1588 Done

Ideally I want a solution that doesn't rely on extra npm

stwissel
  • 20,110
  • 6
  • 54
  • 101
  • Which readline are you using ? core or https://www.npmjs.com/package/readline ? – Ludovic C Dec 23 '15 at 04:57
  • Core only. Wonder if that's solvable using core – stwissel Dec 23 '15 at 04:59
  • How many times does the line event is dispatched ? Can you set a breakpoint ? – Ludovic C Dec 23 '15 at 05:02
  • 1588 times :-). The irony... I commented out the .write event and it then would created the expected number of files - empty of course. You can try. You will for sure have the file mime.types somewhere on disk (I just used it to test since it is nice, long and text only) – stwissel Dec 23 '15 at 05:04
  • 1
    If I remove the pause() and resume(), it works as expected... `file mime.types.1 200 file mime.types.2 200 file mime.types.3 200 file mime.types.4 200 file mime.types.5 200 file mime.types.6 200 file mime.types.7 200 Final close: mime.types.8 188` – Ludovic C Dec 23 '15 at 05:07
  • But the files contain a single line... hmm – Ludovic C Dec 23 '15 at 05:09
  • You can use this core module, https://nodejs.org/api/readline.html – BlackMamba Dec 23 '15 at 05:09
  • @BlackMamba - that's what I'm using. Seems not to react to pause() / resume() – stwissel Dec 23 '15 at 05:13
  • @Ludo: yep - that's the problem – stwissel Dec 23 '15 at 05:13

1 Answers1

5

I got rid of the pause / resume. They seem like they are not supported by the readline stream. All I did is to provide a new writable stream every 200 line.

// Splits a given file into smaller subfiles by line number
var infileName = 'mime.types';
var fileCount = 1;
var count = 0;
var fs = require('fs');
var outStream;
var outfileName = infileName + '.' + fileCount;
newWriteStream();
var inStream = fs.createReadStream(infileName);

var lineReader = require('readline').createInterface({
    input: inStream
});

function newWriteStream(){
    outfileName = infileName + '.' + fileCount;
    outStream = fs.createWriteStream(outfileName);
    count = 0;
}

lineReader.on('line', function(line) {
    count++;
    outStream.write(line + '\n');
    if (count >= 200) {
        fileCount++;
        console.log('file ', outfileName, count);
        outStream.end();
        newWriteStream();
    }
});

lineReader.on('close', function() {
    if (count > 0) {
        console.log('Final close:', outfileName, count);
    }
    inStream.close();
    outStream.end();
    console.log('Done');
});
Ludovic C
  • 2,855
  • 20
  • 40