0

I try to allow users to upload files and save directly in Google Cloud Storage. I use Node.js as server.

The bellow code works, but...

const uploadFile = async (req, res, next) => {
    const file = bucket.file('sample/folder/file.txt');
    // Create a pass through stream from a string
    const passthroughStream = new stream.PassThrough();
    
    passthroughStream.pipe(file.createWriteStream()).on('finish', () => {
        // The file upload is complete
        console.log('write-stream ended');
        res.status(200).send({
            succes: true
        });
    });
    req.on('data', chunk => {
        passthroughStream.write(chunk);
    });
    req.on('end', () => {
        passthroughStream.end();
        console.log('request ended');
    });
};

What I get is this:

------WebKitFormBoundaryzsP9s0Bs6TksaKXo
Content-Disposition: form-data; name="teste.txt"; filename="teste.txt"
Content-Type: text/plain
... rest of the text file...
------WebKitFormBoundaryzsP9s0Bs6TksaKXo--

Not sure if it matters, I created a 8Mb txt file to make sure there will me more chunks. Only in the end and at the beginning I have this txt.

How do I get rid of it? Alternatively: how to do id in another way?

SharpBCD
  • 547
  • 1
  • 7
  • 25
  • Have you checked [Signed URLs](https://cloud.google.com/storage/docs/access-control/signed-urls)? You can upload files directly from client using them. – Dharmaraj Oct 07 '22 at 15:42
  • Nope. I knew only about downloading with signed url. – SharpBCD Oct 07 '22 at 16:25
  • They can use used to upload files as well. You make an API call to your backend, that'll generate a signed URL, then the client can use it to upload the file directly to GCS. – Dharmaraj Oct 07 '22 at 16:26
  • Could work as a workaround but i rather have control over it. I need to save some stuff in db also after the upload is complete – SharpBCD Oct 07 '22 at 16:41
  • You could use [Cloud Storage Triggers](https://firebase.google.com/docs/functions/gcp-storage-events) for Cloud Functions that'll run after a file is uploaded to run some logic. This way users won't have to wait until the image is uploaded to server and then to GCS. But maybe you can try passing base64 strings to server and then follow [this answer](https://stackoverflow.com/questions/42879012/how-do-i-upload-a-base64-encoded-image-string-directly-to-a-google-cloud-stora) to upload to GCS> – Dharmaraj Oct 07 '22 at 16:44
  • Why all this workaround? What i made works, i just need to get rid of the headers. The txt is just example, users upload movies and large pictures. – SharpBCD Oct 07 '22 at 18:22

1 Answers1

0

The solution was to use a library (formidable in this case) and let them handle the stream.

How ever it does NOT work so don't take this approach if you're trying to.

GCS is considerably slower than hdd so, when uploading large files, it consume all available memory from RAM or hdd/buffer. Linux crush or nginx reports no space available.

  • Most elegant solution is with signed URL, clients uploads directly in gcs. However I've met another problem, I will post it in another issues, here, on stackoverflow.
  • Compromise solution that is currently in use is to upload the file on the server, upload it from server to gcs and then delete it from server. It takes about twice the amount of time, not very elegant, but it's working.

Here is the full code for streams pairings:

const uploadFile2 = async (req, res, next) => {
    // Create a pass through stream from a string
    const passthroughStream = new stream.PassThrough();
    
    // will be set in hook. Will be used to log the new uploaded file.
    let fileName;
    let error = null;
    // override function to prevent disk writing and only to stream upload
    formidable.IncomingForm.prototype.onPart = (part) => {
        if (!part.filename) {
            // let formidable handle all non-file parts
            form._handlePart(part);
        }
        part.on('data', (data) => {
            passthroughStream.write(data, er => {
                if (er) {
                    console.error('Eroare cand am incercat sa salvez fisierul', fileName, er);
                    error = er;
                }
            });
        });
        part.on('end', () => {
            // weird error: "try to write after end". This delay fix it, but it's sure not something elegant.
            setTimeout(() => {
                passthroughStream.end();
                // we'll send the res when the cloud ends the writing
                console.log('Ended stream for:', fileName);
                res.status(200).send({
                    succes: true
                });
            }, 500);
            
        });
        part.on('error', (err) => {
            console.error('Something went wrong in uploading file:', err);
            res.status(500).send({
                success: false,
                message: err
            });
        });
    };
    
    const form = new formidable.IncomingForm({
        multiples     : true,
        keepExtensions: true,
        maxFileSize   : 1024 * 1024 * 1024
    });
    
    form.multiples = true;
    form.keepExtensions = true;
    form.maxFileSize = 1024 * 1024 * 1024;
    form.options.maxFileSize = 1024 * 1024 * 1024;
    
    
    form.parse(req, async (err, fields, files) => {
        if (err) {
            console.log('Error parsing the files', err);
            return res.status(500).json({
                error       : true,
                success     : false,
                message     : 'There was an error parsing the files',
                errorMessage: err
            });
        }
    });
    
    form.on('fileBegin', (_, file) => {
        // set filename but eliminate the /upload/ part, so 7 chars
        fileName = req.originalUrl.substring(8) + file.originalFilename;
        console.debug('begining upload for: ', req.originalUrl);
        const bucketFile = bucket.file(fileName);
        passthroughStream
            .pipe(bucketFile.createWriteStream())
            .on('finish', (error) => {
                // The file upload is complete
                if (!error) {
                    console.log('New file was uploaded:', fileName);
                    
                }
                else {
                    console.error('Error in processing passthroughStream');
                    res.status(500).send({
                        error  : true,
                        success: false,
                        message: error
                    });
                }
            })
            .on('error', er => {
                console.error('Error while trying to write on gcs', fileName, er);
                error = er;
            });
    });
    form.on('error', (err) => {
        console.error('Something went wrong in uploading file:', err);
        res.status(500).send({
            error  : true,
            success: false,
            message: err
        });
    });
};
SharpBCD
  • 547
  • 1
  • 7
  • 25