1

Users upload files into my express app. I need to calc hash of the uploaded file and then write file to disk using calculated hash as a filename. I try to do it using the following code:

function storeFileStream(file, next) {
    createFileHash(file, function(err, hash) {
        if (err) {
            return next(err);
        }

        var fileName = path.join(config.storagePath, hash),
            stream = fs.createWriteStream(fileName);

        stream.on('error', function(err) {
            return next(err);
        });
        stream.on('finish', function() {
            return next();
        });

        file.pipe(stream);
    });
}

function createFileHash(file, next) {
    var hash = crypto.createHash('sha1');
    hash.setEncoding('hex');

    file.on('error', function(err) {
        return next(err);
    });
    file.on('end', function(data) {
        hash.end();
        return next(null, hash.read());
    });

    file.pipe(hash);
}

The problem is that after I calc file hash the writed file size is 0. What is the best way do solve this task?

Update According @poke suggestion I try to duplicate my stream. Now my code is:

function storeFileStream(file, next) {
    var s1 = new pass;
    var s2 = new pass;
    file.pipe(s1);
    file.pipe(s2);        

    createFileHash(s1, function(err, hash) {
        if (err) {
            return next(err);
        }

        var fileName = path.join(config.storagePath, hash),
            stream = fs.createWriteStream(fileName);

        stream.on('error', function(err) {
            return next(err);
        });
        stream.on('finish', function() {
            return next();
        });

        s2.pipe(stream);
    });
}

function createFileHash(file, next) {
    var hash = crypto.createHash('sha1');
    hash.setEncoding('hex');

    file.on('error', function(err) {
        return next(err);
    });
    file.on('end', function(data) {
        hash.end();
        return next(null, hash.read());
    });

    file.pipe(hash);
}

The problem of this code is that events end and finish are not emited. If I comment file.pipe(s2); events are emited, but I again get my origin problem.

Dmitriy
  • 683
  • 2
  • 13
  • 26
  • 1
    Since you pipe the file *stream* into the hash function, you have already exhausted the stream completely by the time you have your hash and you want to write the file. You can either write the file to the disk first, and then read it again to calculate the hash, or you need to duplicate your stream up front; see [this question](http://stackoverflow.com/questions/19553837/node-js-piping-the-same-stream-into-multiple-writable-targets). – poke Jun 15 '15 at 09:03
  • @poke Thank you for comment. The first option is not good for me, because I need to calc hash firstly. I try to duplicate stream using PassThrough stream and use the instances of PassThrough instead of `file` parameter. However, 'finish' and 'end' events are not emited now. – Dmitriy Jun 15 '15 at 10:07

3 Answers3

0

This code fix the problem:

var s1 = new passThrough,
    s2 = new passThrough;

file.on('data', function(data) {
    s1.write(data);
    s2.write(data);
});
file.on('end', function() {
    s1.end();
    s2.end();
});
Dmitriy
  • 683
  • 2
  • 13
  • 26
0

The correct and simple way should be as follow:

we should resume the passthroughed stream

function storeFileStream(file, directory, version, reject, resolve) {
  const fileHashSource = new PassThrough();
  const writeSource = new PassThrough();
  file.pipe(fileHashSource);
  file.pipe(writeSource);

  // this is the key point, see https://nodejs.org/api/stream.html#stream_three_states
  fileHashSource.resume();
  writeSource.resume();

  createFileHash(fileHashSource, function(err, hash) {
    if (err) {
      return reject(err);
    }

    const fileName = path.join(directory, version + '_' + hash.slice(0, 8) + '.zip');
    const writeStream = fs.createWriteStream(fileName);

    writeStream.on('error', function(err) {
      return reject(err);
    });
    writeStream.on('finish', function() {
      return resolve();
    });

    writeSource.pipe(writeStream);
  });
}

function createFileHash(readStream, next) {
  const hash = crypto.createHash('sha1');
  hash.setEncoding('hex');

  hash.on('error', function(err) {
    return next(err);
  });
  hash.on('finish', function(data) {
    return next(null, hash.read());
  });

  readStream.pipe(hash);
}
e-cloud
  • 4,331
  • 1
  • 23
  • 37
  • I took your answer and tried to combine it with busboy: https://stackoverflow.com/questions/71068235/hashing-a-streaming-file-before-uploading-to-s3. Any thoughts on how to get it to work? – M.Holmes Feb 14 '22 at 21:59
-1

You could use the async module (not tested but should work):

async.waterfall([
    function(done) {
        var hash = crypto.createHash('sha1');
        hash.setEncoding('hex');

        file.on('error', function(err) {
            done(err);
        });
        file.on('end', function(data) {
            done(null, hash.read);
        });

        file.pipe(hash);

    },
    function(hash, done) {
        var fileName = path.join(config.storagePath, hash),
            stream = fs.createWriteStream(fileName);

        stream.on('error', function(err) {
            done(err);
        });
        stream.on('finish', function() {
            done(null);
        });

        file.pipe(stream);
    }
], function (err) {
    console.log("Everything is done!");
});
michelem
  • 14,430
  • 5
  • 50
  • 66
  • I try your code and it has the same problem: saved file is empty. Actually, I don't know why it may helps me. The problem of reading from already piped stream is still represented here, – Dmitriy Jun 16 '15 at 08:55