11

I have a web app that needs to upload large files to Azure BLOB storage. My solution uses HTML5 File API to slice into chunks which are then put as blob blocks, the IDs of the blocks are stored in an array and then the blocks are committed as a blob.

The solution works fine in IE. On 64 bit Chrome I have successfully uploaded 4Gb files but see very heavy memory usage (2Gb+). On 32 bit Chrome the specific chrome process will get to around 500-550Mb and then crash.

I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.

It's written as an Angular service called from a controller, I think just the service code is pertinent:

(function() {
    'use strict';

    angular
    .module('app.core')
    .factory('blobUploadService',
    [
        '$http', 'stringUtilities',
        blobUploadService
    ]);

function blobUploadService($http, stringUtilities) {

    var defaultBlockSize = 1024 * 1024; // Default to 1024KB
    var stopWatch = {};
    var state = {};

    var initializeState = function(config) {
        var blockSize = defaultBlockSize;
        if (config.blockSize) blockSize = config.blockSize;

        var maxBlockSize = blockSize;
        var numberOfBlocks = 1;

        var file = config.file;

        var fileSize = file.size;
        if (fileSize < blockSize) {
            maxBlockSize = fileSize;
        }

        if (fileSize % maxBlockSize === 0) {
            numberOfBlocks = fileSize / maxBlockSize;
        } else {
            numberOfBlocks = parseInt(fileSize / maxBlockSize, 10) + 1;
        }

        return {
            maxBlockSize: maxBlockSize,
            numberOfBlocks: numberOfBlocks,
            totalBytesRemaining: fileSize,
            currentFilePointer: 0,
            blockIds: new Array(),
            blockIdPrefix: 'block-',
            bytesUploaded: 0,
            submitUri: null,
            file: file,
            baseUrl: config.baseUrl,
            sasToken: config.sasToken,
            fileUrl: config.baseUrl + config.sasToken,
            progress: config.progress,
            complete: config.complete,
            error: config.error,
            cancelled: false
        };
    };

    /* config: {
      baseUrl: // baseUrl for blob file uri (i.e. http://<accountName>.blob.core.windows.net/<container>/<blobname>),
      sasToken: // Shared access signature querystring key/value prefixed with ?,
      file: // File object using the HTML5 File API,
      progress: // progress callback function,
      complete: // complete callback function,
      error: // error callback function,
      blockSize: // Use this to override the defaultBlockSize
    } */
    var upload = function(config) {
        state = initializeState(config);

        var reader = new FileReader();
        reader.onloadend = function(evt) {
            if (evt.target.readyState === FileReader.DONE && !state.cancelled) { // DONE === 2
                var uri = state.fileUrl + '&comp=block&blockid=' + state.blockIds[state.blockIds.length - 1];
                var requestData = new Uint8Array(evt.target.result);

                $http.put(uri,
                        requestData,
                        {
                            headers: {
                                'x-ms-blob-type': 'BlockBlob',
                                'Content-Type': state.file.type
                            },
                            transformRequest: []
                        })
                    .success(function(data, status, headers, config) {
                        state.bytesUploaded += requestData.length;

                        var percentComplete = ((parseFloat(state.bytesUploaded) / parseFloat(state.file.size)) * 100
                        ).toFixed(2);
                        if (state.progress) state.progress(percentComplete, data, status, headers, config);

                        uploadFileInBlocks(reader, state);
                    })
                    .error(function(data, status, headers, config) {
                        if (state.error) state.error(data, status, headers, config);
                    });
            }
        };

        uploadFileInBlocks(reader, state);

        return {
            cancel: function() {
                state.cancelled = true;
            }
        };
    };

    function cancel() {
        stopWatch = {};
        state.cancelled = true;
        return true;
    }

    function startStopWatch(handle) {
        if (stopWatch[handle] === undefined) {
            stopWatch[handle] = {};
            stopWatch[handle].start = Date.now();
        }
    }

    function stopStopWatch(handle) {
        stopWatch[handle].stop = Date.now();
        var duration = stopWatch[handle].stop - stopWatch[handle].start;
        delete stopWatch[handle];
        return duration;
    }

    var commitBlockList = function(state) {
        var uri = state.fileUrl + '&comp=blocklist';

        var requestBody = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
        for (var i = 0; i < state.blockIds.length; i++) {
            requestBody += '<Latest>' + state.blockIds[i] + '</Latest>';
        }
        requestBody += '</BlockList>';

        $http.put(uri,
                requestBody,
                {
                    headers: {
                        'x-ms-blob-content-type': state.file.type
                    }
                })
            .success(function(data, status, headers, config) {
                if (state.complete) state.complete(data, status, headers, config);
            })
            .error(function(data, status, headers, config) {
                if (state.error) state.error(data, status, headers, config);
                // called asynchronously if an error occurs
                // or server returns response with an error status.
            });
    };

    var uploadFileInBlocks = function(reader, state) {
        if (!state.cancelled) {
            if (state.totalBytesRemaining > 0) {

                var fileContent = state.file.slice(state.currentFilePointer,
                    state.currentFilePointer + state.maxBlockSize);
                var blockId = state.blockIdPrefix + stringUtilities.pad(state.blockIds.length, 6);

                state.blockIds.push(btoa(blockId));
                reader.readAsArrayBuffer(fileContent);

                state.currentFilePointer += state.maxBlockSize;
                state.totalBytesRemaining -= state.maxBlockSize;
                if (state.totalBytesRemaining < state.maxBlockSize) {
                    state.maxBlockSize = state.totalBytesRemaining;
                }
            } else {
                commitBlockList(state);
            }
        }
    };

    return {
        upload: upload,
        cancel: cancel,
        startStopWatch: startStopWatch,
        stopStopWatch: stopStopWatch
    };
};
})();

Are there any ways I can move the scope of objects to help with Chrome GC? I have seen other people mentioning similar issues but understood Chromium had resolved some.

I should say my solution is heavily based on Gaurav Mantri's blog post here:

http://gauravmantri.com/2013/02/16/uploading-large-files-in-windows-azure-blob-storage-using-shared-access-signature-html-and-javascript/#comment-47480

WillH
  • 2,086
  • 6
  • 23
  • 40
  • What is purpose of `var fileContent = state.file.slice`? – guest271314 Jan 06 '17 at 07:39
  • This is the file slice function - I slice the file into chunks so that I upload each chunk and create a blob block. See: https://www.html5rocks.com/en/tutorials/file/dndfiles/#toc-slicing-files – WillH Jan 06 '17 at 09:16
  • `.slice()` creates a new `Blob` which remains in memory for the life of the `document`, unless the `Blob` is `closed`. – guest271314 Jan 06 '17 at 16:52
  • if a 4gb file was indeed held in RAM, why would it only use 2GB? you might try a short setTimeout around `uploadFileInBlocks(reader, state);` to let GC do it's thing while "not busy" – dandavis Jan 10 '17 at 11:46

1 Answers1

4

I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.

You are correct. The new Blobs created by .slice() are being held in memory.

The solution is to call Blob.prototype.close() on the Blob reference when processing Blob or File object is complete.

Note also, at javascript at Question also creates a new instance of FileReader if upload function is called more than once.

4.3.1. The slice method

The slice() method returns a new Blob object with bytes ranging from the optional start parameter up to but not including the optional end parameter, and with a type attribute that is the value of the optional contentType parameter.

Blob instances exist for the life of document. Though Blob should be garbage collected once removed from Blob URL Store

9.6. Lifetime of Blob URLs

Note: User agents are free to garbage collect resources removed from the Blob URL Store.

Each Blob must have an internal snapshot state, which must be initially set to the state of the underlying storage, if any such underlying storage exists, and must be preserved through StructuredClone. Further normative definition of snapshot state can be found for Files.

4.3.2. The close method

The close() method is said to close a Blob, and must act as follows:

  1. If the readability state of the context object is CLOSED, terminate this algorithm.
  2. Otherwise, set the readability state of the context object to CLOSED.
  3. If the context object has an entry in the Blob URL Store, remove the entry that corresponds to the context object.

If Blob object is passed to URL.createObjectURL(), call URL.revokeObjectURL() on Blob or File object, then call .close().

The revokeObjectURL(url) static method

Revokes the Blob URL provided in the string url by removing the corresponding entry from the Blob URL Store. This method must act as follows: 1. If the url refers to a Blob that has a readability state of CLOSED OR if the value provided for the url argument is not a Blob URL, OR if the value provided for the url argument does not have an entry in the Blob URL Store, this method call does nothing. User agents may display a message on the error console. 2. Otherwise, user agents must remove the entry from the Blob URL Store for url.

You can view the result of these calls by opening

chrome://blob-internals 

reviewing details of before and after calls which create Blob and close Blob.

For example, from

xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain
Type: data
Length: 3

to

xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx
Refcount: 1
Content Type: text/plain

following call to .close(). Similarly from

blob:http://example.com/c2823f75-de26-46f9-a4e5-95f57b8230bd
Uuid: 29e430a6-f093-40c2-bc70-2b6838a713bc

An alternative approach could be to send file as an ArrayBuffer or chunks of array buffers. Then re-assemble the file at server.

Or you can call FileReader constructor, FileReader.prototype.readAsArrayBuffer(), and load event of FileReader each once.

At load event of FileReader pass ArrayBuffer to Uint8Array, use ReadableStream, TypedArray.prototype.subarray(), .getReader(), .read() to get N chunks of ArrayBuffer as a TypedArray at pull from Uint8Array. When N chunks equaling .byteLength of ArrayBuffer have been processed, pass array of Uint8Arrays to Blob constructor to recombine file parts into single file at browser; then send Blob to server.

<!DOCTYPE html>
<html>

<head>
</head>

<body>
  <input id="file" type="file">
  <br>
  <progress value="0"></progress>
  <br>
  <output for="file"><img alt="preview"></output>
  <script type="text/javascript">
    const [input, output, img, progress, fr, handleError, CHUNK] = [
      document.querySelector("input[type='file']")
      , document.querySelector("output[for='file']")
      , document.querySelector("output img")
      , document.querySelector("progress")
      , new FileReader
      , (err) => console.log(err)
      , 1024 * 1024
    ];

    progress.addEventListener("progress", e => {
      progress.value = e.detail.value;
      e.detail.promise();
    });

    let [chunks, NEXT, CURR, url, blob] = [Array(), 0, 0];

    input.onchange = () => {
      NEXT = CURR = progress.value = progress.max = chunks.length = 0;
      if (url) {
        URL.revokeObjectURL(url);
        if (blob.hasOwnProperty("close")) {
          blob.close();
        }
      }

      if (input.files.length) {
        console.log(input.files[0]);
        progress.max = input.files[0].size;
        progress.step = progress.max / CHUNK;
        fr.readAsArrayBuffer(input.files[0]);
      }

    }

    fr.onload = () => {
      const VIEW = new Uint8Array(fr.result);
      const LEN = VIEW.byteLength;
      const {type, name:filename} = input.files[0];
      const stream = new ReadableStream({
          pull(controller) {
            if (NEXT < LEN) {
              controller
              .enqueue(VIEW.subarray(NEXT, !NEXT ? CHUNK : CHUNK + NEXT));
               NEXT += CHUNK;
            } else {
              controller.close();
            }
          },
          cancel(reason) {
            console.log(reason);
            throw new Error(reason);
          }
      });

      const [reader, processData] = [
        stream.getReader()
        , ({value, done}) => {
            if (done) {
              return reader.closed.then(() => chunks);
            }
            chunks.push(value);
            return new Promise(resolve => {
              progress.dispatchEvent(
                new CustomEvent("progress", {
                  detail:{
                    value:CURR += value.byteLength,
                    promise:resolve
                  }
                })
              );                
            })
            .then(() => reader.read().then(data => processData(data)))
            .catch(e => reader.cancel(e))
        }
      ];

      reader.read()
      .then(data => processData(data))
      .then(data => {
        blob = new Blob(data, {type});
        console.log("complete", data, blob);
        if (/image/.test(type)) {
          url = URL.createObjectURL(blob);
          img.onload = () => {
            img.title = filename;
            input.value = "";
          }
          img.src = url;
        } else {
          input.value = "";
        }             
      })
      .catch(e => handleError(e))

    }
  </script>

</body>

</html>

plnkr http://plnkr.co/edit/AEZ7iQce4QaJOKut71jk?p=preview


You can also use utilize fetch()

fetch(new Request("/path/to/server/", {method:"PUT", body:blob}))

To transmit body for a request request, run these steps:

  1. Let body be request’s body.
  2. If body is null, then queue a fetch task on request to process request end-of-body for request and abort these steps.

  3. Let read be the result of reading a chunk from body’s stream.

    • When read is fulfilled with an object whose done property is false and whose value property is a Uint8Array object, run these substeps:

      1. Let bytes be the byte sequence represented by the Uint8Array object.
      2. Transmit bytes.

      3. Increase body’s transmitted bytes by bytes’s length.

      4. Run the above step again.

    • When read is fulfilled with an object whose done property is true, queue a fetch task on request to process request end-of-body for request.

    • When read is fulfilled with a value that matches with neither of the above patterns, or read is rejected, terminate the ongoing fetch with reason fatal.

See also

Community
  • 1
  • 1
guest271314
  • 1
  • 15
  • 104
  • 177
  • Thank you for that, I had not spotted that in the documents. However I am having trouble getting this to work, I changed my uploadFileInBlocks function to be: var fileContent = state.file.slice(state.currentFilePointer, state.currentFilePointer + state.maxBlockSize); var blockId = state.blockIdPrefix + stringUtilities.pad(state.blockIds.length, 6); state.blockIds.push(btoa(blockId)); reader.readAsArrayBuffer(fileContent); fileContent.prototype.close(); but got errors: TypeError: Cannot read property 'close' of undefined at uploadFileInBlocks – WillH Jan 09 '17 at 20:16
  • https://developer.mozilla.org/en/docs/Web/API/Blob#Browser_compatibility Seems it's not supported by any browsers? – WillH Jan 09 '17 at 20:20
  • Apparently not. Tried approach at Answer at chromium. Are multiple `POST` requests being made to server? – guest271314 Jan 09 '17 at 20:21
  • If multiple `POST` requests are being made to server, you can alternatively use `FileReader.readAsArrayBuffer`, `Uint8Array`, `ReadableStream` to send chunks of file to server. – guest271314 Jan 09 '17 at 20:33
  • 1
    OP is not using BlobURLs, so i don't see what all the talk about their lifetime is about... – dandavis Jan 10 '17 at 11:36
  • @dandavis _"OP is not using BlobURLs, so i don't see what all the talk about their lifetime is about"_ Not sure what you mean by "all the talk"? _"If Blob object is passed to URL.createObjectURL()"_ Note "If". Have not tried `angularjs`, not entirely certain about everything occurring at `javascript` at Question, though appears `.slice()` is called multiple times which creates a new `Blob`, possibly `.readAsArrayBuffer(fileContent)` as well. The emphasis is `Blob` lifetime, not `Blob URL`; included mention of `Blob URL` for completeness. That portion of Answer can be ignored if not applicable. – guest271314 Jan 10 '17 at 17:09
  • @dandavis Not sure what you mean? `chrome://blob-internals`, at chromium, chrome lists `Blob`s at `document` which are not closed. What is "vaporware"? What OP has to work with is that `.slice()` creates a new `Blob` that is stored in "memory". It is perhaps beyond the scope of this Question to delve into exactly where and how `Blob`s are stored at user filesystem, though have asked the Question [Where is Blob binary data stored?](http://stackoverflow.com/q/38239361/), and eventually found the files. Germane in that multiple `.slice()` and `.readAsArraBuffer()` calls can be omitted. – guest271314 Jan 10 '17 at 18:05
  • oh snap, you're right about blob-internals, been a while, sorry about that! by vaporware I mean that `new Blob([]).close()` doesn't work in chrome or anywhere else i tried, at least not yet ;) – dandavis Jan 10 '17 at 18:08
  • @dandavis _"I mean that `new Blob([]).close()` doesn't work in chrome or anywhere else i tried"_ What do you mean by "doesn't work"? Which version of chrome did you try at? 1) Open `chrome://blob-internals`; 2) at `console` or an `html` `document`, `var blob = new Blob([1,2,3])`; 3) reload `chrome://blob-internals` to view reference to `Blob`; 4) call `blob.close()`; 5) reload `chrome://blob-internals`; 6) Note the changes to `Blob` reference listed at the page. – guest271314 Jan 10 '17 at 18:26
  • i mean that if i paste that into devtools i get `Uncaught TypeError: blob.close is not a function` – dandavis Jan 10 '17 at 18:28
  • @dandavis Which version of chrome did you try at? – guest271314 Jan 10 '17 at 18:28
  • `Version 55.0.2883.87 m (64-bit)`, which it claims is up to date... Is close() a few weeks away? (yay) – dandavis Jan 10 '17 at 18:29
  • @dandavis Interesting. Do not usually use chrome, here. The above procedure returns results described at chromium 52. Try launching with each of and both of these flags set `--enable-experimental-web-platform-features`, `--javascript-harmony `. – guest271314 Jan 10 '17 at 18:31
  • i already have those chrome-flag'd, is that the same? man, i can never even find chrome.... – dandavis Jan 10 '17 at 18:31
  • @dandavis _"i already have those chrome-flag'd"_ What do you mean? The instance of chrome which you tried at has those flags set? Open `chrome://version` review "Command Line", what is present between `--flag-switches-begin` and `--flag-switches-end` ? _"man, i can never even find chrome...."_? – guest271314 Jan 10 '17 at 18:33
  • @dandavis What is the executable line at desktop launcher for chrome? – guest271314 Jan 10 '17 at 18:39
  • ok, `\Chrome\Application\chrome.exe" --flag-switches-begin --no-pings --enable-devtools-experiments --javascript-harmony ... ` Must be the experimental web platform features, i sometimes turn that off to test prod ;) – dandavis Jan 10 '17 at 18:40
  • @dandavis With the flag enabled, is the reference to the `Blob` data removed at `chrome://blob-internals` when the steps at http://stackoverflow.com/questions/41440235/chrome-memory-issue-file-api-angularjs/41510379?noredirect=1#comment70356290_41510379 are followed? – guest271314 Jan 10 '17 at 18:45
  • i bet they would be, but i need to have to browser open for the next 73 mins at least, so i'll have to get back to you... – dandavis Jan 10 '17 at 18:48
  • 1
    working remotely, in a meeting, shouldn't even be here but still waiting on the boss. – dandavis Jan 10 '17 at 18:50