I'm using a stream passthrough to pipe a large file to an S3 bucket and I believe the response is being sent too soon - before the stream and be initialized. Something like that. Smaller requests seem to work just fine.
Here's an example:
const express = require("express");
const router = express.Router();
const stream = require("stream");
const AWS = require("aws-sdk");
const s3 = new AWS.S3(awsCreds);
const uploadFromStream = id => {
let pass = new stream.PassThrough();
let params = {
Bucket: "testbucket",
Key: `test/${id}.json`,
Body: pass
};
s3.upload(params, (err, data) => {
if (err) {console.error(err)};
console.log(data);
});
return pass;
}
router.post("/filestream", (req, res) => {
let id = Math.floor(Math.random() * 100000);
req.pipe(uploadFromStream(id));
res.status(200).send(`API: Stream initiated. Check S3 path for file: '${id}.json'`);
});
module.exports = router;
The idea is; don't lock the thread to allow multiple requests for streams, which is why I'm sending the 200 back immediately.
If I put a 1 sec delay in there, I can get much larger POSTs through, consistently.
router.post("/filestream", (req, res) => {
let id = Math.floor(Math.random() * 100000);
req.pipe(uploadFromStream(id));
setTimeout(() => {
res.status(200).send(`API: Stream initiated. Check S3 path for file: '${id}.json'`);
}, 1000);
});
However, that feels hacky. I would prefer not to intentionally lock the thread and slow all requests for all users. Streaming was supposed to solve that issue.
Is there a better way to do this? Is throttling necessary? What am I missing?