2

How do you upload a 500mb file and get a MD5 hash with CryptoJS?

Here is my code:

$('#upload-file').change(function(){
    var reader = new FileReader();
    reader.addEventListener('load',function () {
        var hash = CryptoJS.MD5(CryptoJS.enc.Latin1.parse(this.result));
        window.md5 = hash.toString(CryptoJS.enc.Hex);
    });

    reader.readAsBinaryString(this.files[0]);
});

If the file is under 200mb, it works. Anything bigger, this.result is an empty "".

I've tried:

filereader api on big files

javascript FileReader - parsing long file in chunks

and almost got this to work , but console is complaining about .join("")

http://dojo4.com/blog/processing-huge-files-with-an-html5-file-input

Cœur
  • 37,241
  • 25
  • 195
  • 267
Gundam Meister
  • 1,425
  • 2
  • 19
  • 29

3 Answers3

15

CryptoJS has a progressive api for hash digests. The rest is taken form alediaferia's answer with slight modifications.

function process() {
  getMD5(
    document.getElementById("my-file-input").files[0],
    prog => console.log("Progress: " + prog)
  ).then(
    res => console.log(res),
    err => console.error(err)
  );
}

function readChunked(file, chunkCallback, endCallback) {
  var fileSize   = file.size;
  var chunkSize  = 4 * 1024 * 1024; // 4MB
  var offset     = 0;
  
  var reader = new FileReader();
  reader.onload = function() {
    if (reader.error) {
      endCallback(reader.error || {});
      return;
    }
    offset += reader.result.length;
    // callback for handling read chunk
    // TODO: handle errors
    chunkCallback(reader.result, offset, fileSize); 
    if (offset >= fileSize) {
      endCallback(null);
      return;
    }
    readNext();
  };

  reader.onerror = function(err) {
    endCallback(err || {});
  };

  function readNext() {
    var fileSlice = file.slice(offset, offset + chunkSize);
    reader.readAsBinaryString(fileSlice);
  }
  readNext();
}

function getMD5(blob, cbProgress) {
  return new Promise((resolve, reject) => {
    var md5 = CryptoJS.algo.MD5.create();
    readChunked(blob, (chunk, offs, total) => {
      md5.update(CryptoJS.enc.Latin1.parse(chunk));
      if (cbProgress) {
        cbProgress(offs / total);
      }
    }, err => {
      if (err) {
        reject(err);
      } else {
        // TODO: Handle errors
        var hash = md5.finalize();
        var hashHex = hash.toString(CryptoJS.enc.Hex);
        resolve(hashHex);
      }
    });
  });
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/components/core.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/3.1.2/components/md5.js"></script>
<input id="my-file-input" type="file">
<button onclick="process()">Process</button>
Community
  • 1
  • 1
Tamas Hegedus
  • 28,755
  • 12
  • 63
  • 97
  • Thanks Tamas. I tried a simple.txt file, it works. However, when I tried a .zip of the same file, the hash is not correct. – Gundam Meister Aug 24 '16 at 00:22
  • There is something with binary junk, I think chrome tries to decode it as utf8 – Tamas Hegedus Aug 24 '16 at 00:24
  • Found it, cryptojs converts strings to WordArrays using utf8 encoding by default. Had to convert it first. – Tamas Hegedus Aug 24 '16 at 00:31
  • IT WORKS! THANK YOU TAMAS! – Gundam Meister Aug 24 '16 at 00:37
  • @TamasHegedus what part of the snippet did you change to get it to work with zips? – Arthur May 30 '20 at 00:17
  • 1
    @Arthur it was the `md5.update(CryptoJS.enc.Latin1.parse(chunk))` part, I added it to the snippet – Tamas Hegedus May 30 '20 at 10:57
  • @TamasHegedus thank you :), I was comparing the hash results to onlinemd5.com and it turns out onlinemd5 was incorrect, not your code – Arthur May 30 '20 at 22:21
  • We need an implementation of this that does not use readAsBinaryString() which is not supported in MSIE – FirstVertex Jun 03 '20 at 14:12
  • 1
    @HDog with some modification and some coding you could use [`readAsArrayBuffer`](https://developer.mozilla.org/en-US/docs/Web/API/FileReader/readAsArrayBuffer) which is supported by MSIE. Also, tell your clients that MSIE is dead and charge at least double the price if they insist on supporting it. – Tamas Hegedus Jun 04 '20 at 08:39
  • Before this turns into "clobber the Dogger" - I hate supporting MSIE as much as the rest of you. I happen to work in a large corporation and we have fought this fight a long time but still my paycheck depends on following the chain of command so I hold my nose and do the best I can. – FirstVertex Jul 15 '20 at 18:07
1

You don't need to read the whole file at once and feed it all in one go to CryptoJS routines.

You can create the hasher object, and feed chunks as you read them, and then get the final result.

Sample taken from the CryptoJS documentation

var sha256 = CryptoJS.algo.SHA256.create();
sha256.update("Message Part 1");
sha256.update("Message Part 2");
sha256.update("Message Part 3");
var hash = sha256.finalize();
jcaron
  • 17,302
  • 6
  • 32
  • 46
0

Modified @Tamas answer for doing file hash, added async await and some checks. Cheers!

export default async function getFileHash(file,
  {
    CryptoJS = null,
    cbProgress = null,
    algo = "SHA256",
    encoding = "Base64url",
    chunkSize = 1 * 1024 * 1024
  } = {}) {

  if (!CryptoJS) {
    return Promise.reject("crypto-js module not provided")
  }
  if (!CryptoJS.algo.hasOwnProperty(algo)) {
    return Promise.reject("Given hash algo not supported")
  }
  if (!["Base64", "Base64url", "Hex"].includes(encoding)) {
    return Promise.reject(
      `Given hash encoding not supported. Supported encodings are "Base64", "Base64url", "Hex"`
    )
  }

  return new Promise((resolve, reject) => {
    var hashObj = CryptoJS.algo[algo].create()
    var fileSize = file.size;
    var offset = 0;

    var reader = new FileReader();
    reader.onload = function () {
      if (reader.error) {
        return reject(reader.error);
      }
      hashObj.update(CryptoJS.enc.Latin1.parse(reader.result));

      offset += reader.result.length;
      if (cbProgress) {
        cbProgress(offset / fileSize);
      }

      if (offset >= fileSize) {
        var hash = hashObj.finalize();
        var hashHex = hash.toString(CryptoJS.enc[encoding]);
        return resolve(hashHex);
      }
      readNext();
    };

    reader.onerror = function (err) {
      reject(err);
    };

    function readNext() {
      var fileSlice = file.slice(offset, offset + chunkSize);
      reader.readAsBinaryString(fileSlice);
    }
    readNext();
  });
}
Pankaj
  • 37
  • 7