Runtime env is Node 14 (AWS Lambda function).
The S3 bucket and Lambda function are in the same region, and the I have confirmed that the Lambda function is able to get the object from S3 (i.e. permissions does not seem to be an issue).
The Lambda triggers upon PUT of an object (a very simple CSV file) into the S3 bucket. No errors or exceptions appear in the CloudWatch log stream.
package.json
{
"dependencies": {
"@fast-csv/parse": "4.3.6"
}
}
index.js
const aws = require('aws-sdk');
const s3 = new aws.S3({region: 'us-east-2'});
const fs = require('fs');
const csv = require('@fast-csv/parse');
exports.handler = async (event, context) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const file = await s3.getObject(params).promise();
fs.createReadStream(file.Body).pipe(csv.parse())
.on('error', (error) => console.error(error))
.on('data', (row) => console.log(row))
.on('end', (rowCount) => console.log(`Parsed ${rowCount} rows`));
};
I also tried the following variation, which had the same outcome:
index.js (variant)
const aws = require('aws-sdk');
const s3 = new aws.S3({region: 'us-east-2'});
const fs = require('fs');
const csv = require('fast-csv');
exports.handler = async (event, context) => {
const bucket = event.Records[0].s3.bucket.name;
const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
const params = {
Bucket: bucket,
Key: key,
};
const file = await s3.getObject(params).promise();
const stream = fs.createReadStream(file.Body);
csv.parseStream(stream)
.on('data', (data) => {
console.info('Data: ' + JSON.stringify(data));
})
.on('data-invalid', (data) => {
console.error('Invalid batch row ' + data);
})
.on('end', () => {
console.info('End of Stream');
})
.on('error', (error) => {
let message = "Error in csv stream processing";
console.error(message, ":", error);
}
);
};
Note: I already tried simply doing await s3.getObject(params).createReadStream()
, but that results in undefined
whereas getting the object via promise()
first gets the object data.
I've been wrestling with this for hours, so any help is appreciated. Thanks!