I am trying to write a lambda function that can convert stream a huge csv file to multiple small json files (say a json file for 2000 rows) from and to a s3 bucket. I though have some restrictions like running in a limited RAM memory of 256 MB.
I am able to do the same by getting the file as file instead of stream like below.
But due to memory constraints i need to handle this in streams. Is there a way to do the same using streams?
// transformationClass.js
const csv = require('csvtojson');
const extension = '.json';
class S3CsvToJson {
static async perform(input, output, headers) {
let jsonArray = null;
const s3Object = await s3.getObject(); // getting the s3 object
const csvString = s3Object.Body.toString('utf8');
await csv({
noheader: false,
})
.fromString(csvString)
.then((csvRow) => {
jsonArray = csvRow;
});
const fileNames = await S3CsvToJson.writeToFile(jsonArray, output);
return { files: fileNames };
}
static async writeToFile(jsonArray, output) {
const minNumber = 0;
const maxNumber = 1999;
const fileNames = [];
let outFile;
if (jsonArray && Array.isArray(jsonArray)) {
let fileIterator = 1;
while (jsonArray.length) {
outFile = `${output.key}-${fileIterator}${extension}`;
await // s3.putObject(). writing to s3
.putObject(
outFile,
output.bucketName,
JSON.stringify(jsonArray.splice(minNumber, maxNumber)),
);
console.log('rows left :', jsonArray.length);
fileNames.push(outFile);
fileIterator += 1;
}
}
return fileNames;
}
}
module.exports = S3CsvToJson;
here is the handler function
// handler.js
module.exports.perform = async (event, context, callback) => {
context.callbackWaitsForEmptyEventLoop = false;
await s3CsvToJson.perform(event.input, event.output, event.headerMapping)
.then((result) => callback(null, result));
console.log('leaving - ', Date.now());
};
Thanks in advance!!