I have simple script to handle CSV file with size 10GB. The idea is pretty simple.
- Open file as stream.
- Parse CSV objects from it.
- Modify objects.
- Make output stream to new file.
I made following code, but it cause memory leak. I have tried a lot of different things, but nothing helps. The memory leak disappear if I remove transformer from pipes. Maybe it causes memory leak.
I run the code under NodeJS.
Can you help me found where I am wrong?
'use strict';
import fs from 'node:fs';
import {parse, transform, stringify} from 'csv';
import lineByLine from 'n-readlines';
// big input file
const inputFile = './input-data.csv';
// read headers first
const linesReader = new lineByLine(inputFile);
const firstLine = linesReader.next();
linesReader.close();
const headers = firstLine.toString()
.split(',')
.map(header => {
return header
.replace(/^"/, '')
.replace(/"$/, '')
.replace(/\s+/g, '_')
.replace('(', '_')
.replace(')', '_')
.replace('.', '_')
.replace(/_+$/, '');
});
// file stream
const fileStream1 = fs.createReadStream(inputFile);
// parser stream
const parserStream1 = parse({delimiter: ',', cast: true, columns: headers, from_line: 1});
// transformer
const transformer = transform(function(record) {
return Object.assign({}, record, {
SomeField: 'BlaBlaBla',
});
});
// stringifier stream
const stringifier = stringify({delimiter: ','});
console.log('Loading data...');
// chain of pipes
fileStream1.on('error', err => { console.log(err); })
.pipe(parserStream1).on('error', err => {console.log(err); })
.pipe(transformer).on('error', err => { console.log(err); })
.pipe(stringifier).on('error', err => { console.log(err); })
.pipe(fs.createWriteStream('./_data/new-data.csv')).on('error', err => { console.log(err); })
.on('finish', () => {
console.log('Loading data finished!');
});