Trying to get the Hash (SHA-512) of a very Large file, more that 2.5 G in javascript

Question

I am trying to get the SHA512 of a large file. 2.5 G and maybe more larger file. I so the approach it's to create an arraybuffer to be digest by the crypto.subtle.digest API.

The problem is i always have a

Array buffer allocation failed

Is it my chunk size, it's there a limit on the array buffer. I got no more idea ? Or maybe there is a better way to get the hash digest instead use a full arraybuffer ?

// received a file object 
    function CalculateHash(file)
    {
 var obj = { File : file}; 
    
     var chunkSize = 10485760;
      const chunksQuantity = Math.ceil(obj.File.size / chunkSize);
      const chunksQueue = new Array(chunksQuantity).fill().map((_, index) => index).reverse();
      
    
    
      
      var buffer = null;
      
      reader.onload =  async function (evt) {
        if (buffer == null) {
          buffer = evt.currentTarget.result;
        } else {
          var tmp = new Uint8Array(buffer.byteLength + evt.currentTarget.result.byteLength);
          tmp.set(new Uint8Array(buffer), 0);
          tmp.set(new Uint8Array(evt.currentTarget.result), buffer.byteLength);
          buffer = tmp;
        }    
        readNext();
      }
      
    
        var readNext = async function () {
          if (chunksQueue.length > 0) {
            const chunkId = chunksQueue.pop();
            const sentSize = chunkId * chunkSize;
            const chunk = obj.File.slice(sentSize, sentSize + chunkSize);
            reader.readAsArrayBuffer(chunk);
    
          } else {
            var x = await digestMessage(buffer);
            hash.SHA512 = x.toUpperCase();   
            buffer = null;
           
          }
      }
    
      readNext();
    }
    
    async function digestMessage(file) {  
      const hashBuffer = await crypto.subtle.digest('SHA-512', file);           // hash the message
      const hashArray = Array.from(new Uint8Array(hashBuffer));                     // convert buffer to byte array
      const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); // convert bytes to hex string
      return hashHex;
    }

Usually whenever I need to hash a large file, I sample up to 1MB of it. There is no GOOD reason to hash the entire file when 1MB of will be pretty unique. — felixmosh, Oct 21 '20 at 15:53
The first 1 MB of a large file could be inchanged but let's said that the latest byte of your file has change .. your hash will give a wrong result no ? — Cédric Boivin, Oct 21 '20 at 15:57
It seems that `crypto.subtle.digest` doesn't provide a progressive hash. You would likely need another hash implementation if you want compatibility with commandline hashing tools. See my answer with slow hashing based on crypto-js: https://stackoverflow.com/a/28213834/1816580 — Artjom B., Oct 21 '20 at 16:59
@ArtjomB. thank you i think you solve my problem. I mix both strategie, because the native digest is more faster than the crypto.js. So file larger than 1 GB will be process by the CryptoJS and smaller will be process by the native one. — Cédric Boivin, Oct 21 '20 at 17:26
@CédricBoivin: Questions without answers aren't super great for later programmers to find. Please consider answering your own question with how you solved your problem. Thanks. — President James K. Polk, Oct 21 '20 at 20:37
@PresidentJamesK.Polk Yes, that was my intention, i was testing the approach. Before post my final answer :-) — Cédric Boivin, Oct 21 '20 at 20:43
@ArtjomB the cryptoJS library is very slow. If you have any suggestion or code update on my code bellow it will be great to know. — Cédric Boivin, Oct 21 '20 at 21:04

Cédric Boivin · Accepted Answer · 2020-10-21T20:50:15.103

Base on @ArtjomB. answer, the problem was the progressiveHash. The limitation of the BufferArray and the browser.

That is the final worker code. It mixe both approach with native digest that is most very fast than the cryptoJS library. If the file is larger than 1Gb we use the CryptoJS library, if not we use the native browser digest. Any suggestion are welcome!

    var window = self;
        var document = {};
        self.importScripts("/Crypto.min.js");
        
        
        onmessage = async function (args) {
          var obj = args.data;  
      var reader = new FileReader();
      var hash = {};
      var chunkSize = 10485760;
      var largeFileTrigger = 1048576000;
      const chunksQuantity = Math.ceil(obj.File.size / chunkSize);
      const chunksQueue = new Array(chunksQuantity).fill().map((_, index) => index).reverse();
      var isLargeFile = obj.File.size > largeFileTrigger;
      var buffer = null;
      var progressiveArray = [];
      reader.onload = async function (evt) {
        if (isLargeFile) {
          progressiveArray.push(evt.currentTarget.result);
        } else {
          if (buffer == null) {
            buffer = evt.currentTarget.result;
          } else {
            var tmp = new Uint8Array(buffer.byteLength + evt.currentTarget.result.byteLength);
            tmp.set(new Uint8Array(buffer), 0);
            tmp.set(new Uint8Array(evt.currentTarget.result), buffer.byteLength);
            buffer = tmp;
          }
        }    
        readNext();
      }
    
        var readNext = async function () {
          if (chunksQueue.length > 0) {
            const chunkId = chunksQueue.pop();
            const sentSize = chunkId * chunkSize;
            const chunk = obj.File.slice(sentSize, sentSize + chunkSize);
            reader.readAsArrayBuffer(chunk);
    
          } else {
            var hexHash = null;
            if (isLargeFile) {
              var sha = CryptoJS.algo.SHA512.create();
              for (var i = 0; i < progressiveArray.length; i++) {
                sha.update(arrayBufferToWordArray(progressiveArray[i]));
              }
              hexHash = sha.finalize().toString();          
            } else {
              hexHash = await digestMessage(buffer);
            }     
           
            SHA512 = hexHash.toUpperCase();    
            buffer = null;
            progressiveArray = null;
            postMessage({ Hash: SHA512 });
          }
      }
    
      readNext();
    }
    
    async function digestMessage(file) {  
      const hashBuffer = await crypto.subtle.digest('SHA-512', file);           // hash the message
      const hashArray = Array.from(new Uint8Array(hashBuffer));                     // convert buffer to byte array
      const hashHex = hashArray.map(b => b.toString(16).padStart(2, '0')).join(''); // convert bytes to hex string
      return hashHex;
    }
    
    function arrayBufferToWordArray(ab) {
      var i8a = new Uint8Array(ab);
      var a = [];
      for (var i = 0; i < i8a.length; i += 4) {
        a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
      }
      return CryptoJS.lib.WordArray.create(a, i8a.length);
    }

Trying to get the Hash (SHA-512) of a very Large file, more that 2.5 G in javascript

1 Answers1