I have a web app that needs to upload large files to Azure BLOB storage. My solution uses HTML5 File API to slice into chunks which are then put as blob blocks, the IDs of the blocks are stored in an array and then the blocks are committed as a blob.
The solution works fine in IE. On 64 bit Chrome I have successfully uploaded 4Gb files but see very heavy memory usage (2Gb+). On 32 bit Chrome the specific chrome process will get to around 500-550Mb and then crash.
I can't see any obvious memory leaks or things I can change to help garbage collection. I store the block IDs in an array so obviously there will be some memory creeep but this shouldn't be massive. It's almost as if the File API is holding the whole file it slices into memory.
It's written as an Angular service called from a controller, I think just the service code is pertinent:
(function() {
'use strict';
angular
.module('app.core')
.factory('blobUploadService',
[
'$http', 'stringUtilities',
blobUploadService
]);
function blobUploadService($http, stringUtilities) {
var defaultBlockSize = 1024 * 1024; // Default to 1024KB
var stopWatch = {};
var state = {};
var initializeState = function(config) {
var blockSize = defaultBlockSize;
if (config.blockSize) blockSize = config.blockSize;
var maxBlockSize = blockSize;
var numberOfBlocks = 1;
var file = config.file;
var fileSize = file.size;
if (fileSize < blockSize) {
maxBlockSize = fileSize;
}
if (fileSize % maxBlockSize === 0) {
numberOfBlocks = fileSize / maxBlockSize;
} else {
numberOfBlocks = parseInt(fileSize / maxBlockSize, 10) + 1;
}
return {
maxBlockSize: maxBlockSize,
numberOfBlocks: numberOfBlocks,
totalBytesRemaining: fileSize,
currentFilePointer: 0,
blockIds: new Array(),
blockIdPrefix: 'block-',
bytesUploaded: 0,
submitUri: null,
file: file,
baseUrl: config.baseUrl,
sasToken: config.sasToken,
fileUrl: config.baseUrl + config.sasToken,
progress: config.progress,
complete: config.complete,
error: config.error,
cancelled: false
};
};
/* config: {
baseUrl: // baseUrl for blob file uri (i.e. http://<accountName>.blob.core.windows.net/<container>/<blobname>),
sasToken: // Shared access signature querystring key/value prefixed with ?,
file: // File object using the HTML5 File API,
progress: // progress callback function,
complete: // complete callback function,
error: // error callback function,
blockSize: // Use this to override the defaultBlockSize
} */
var upload = function(config) {
state = initializeState(config);
var reader = new FileReader();
reader.onloadend = function(evt) {
if (evt.target.readyState === FileReader.DONE && !state.cancelled) { // DONE === 2
var uri = state.fileUrl + '&comp=block&blockid=' + state.blockIds[state.blockIds.length - 1];
var requestData = new Uint8Array(evt.target.result);
$http.put(uri,
requestData,
{
headers: {
'x-ms-blob-type': 'BlockBlob',
'Content-Type': state.file.type
},
transformRequest: []
})
.success(function(data, status, headers, config) {
state.bytesUploaded += requestData.length;
var percentComplete = ((parseFloat(state.bytesUploaded) / parseFloat(state.file.size)) * 100
).toFixed(2);
if (state.progress) state.progress(percentComplete, data, status, headers, config);
uploadFileInBlocks(reader, state);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
});
}
};
uploadFileInBlocks(reader, state);
return {
cancel: function() {
state.cancelled = true;
}
};
};
function cancel() {
stopWatch = {};
state.cancelled = true;
return true;
}
function startStopWatch(handle) {
if (stopWatch[handle] === undefined) {
stopWatch[handle] = {};
stopWatch[handle].start = Date.now();
}
}
function stopStopWatch(handle) {
stopWatch[handle].stop = Date.now();
var duration = stopWatch[handle].stop - stopWatch[handle].start;
delete stopWatch[handle];
return duration;
}
var commitBlockList = function(state) {
var uri = state.fileUrl + '&comp=blocklist';
var requestBody = '<?xml version="1.0" encoding="utf-8"?><BlockList>';
for (var i = 0; i < state.blockIds.length; i++) {
requestBody += '<Latest>' + state.blockIds[i] + '</Latest>';
}
requestBody += '</BlockList>';
$http.put(uri,
requestBody,
{
headers: {
'x-ms-blob-content-type': state.file.type
}
})
.success(function(data, status, headers, config) {
if (state.complete) state.complete(data, status, headers, config);
})
.error(function(data, status, headers, config) {
if (state.error) state.error(data, status, headers, config);
// called asynchronously if an error occurs
// or server returns response with an error status.
});
};
var uploadFileInBlocks = function(reader, state) {
if (!state.cancelled) {
if (state.totalBytesRemaining > 0) {
var fileContent = state.file.slice(state.currentFilePointer,
state.currentFilePointer + state.maxBlockSize);
var blockId = state.blockIdPrefix + stringUtilities.pad(state.blockIds.length, 6);
state.blockIds.push(btoa(blockId));
reader.readAsArrayBuffer(fileContent);
state.currentFilePointer += state.maxBlockSize;
state.totalBytesRemaining -= state.maxBlockSize;
if (state.totalBytesRemaining < state.maxBlockSize) {
state.maxBlockSize = state.totalBytesRemaining;
}
} else {
commitBlockList(state);
}
}
};
return {
upload: upload,
cancel: cancel,
startStopWatch: startStopWatch,
stopStopWatch: stopStopWatch
};
};
})();
Are there any ways I can move the scope of objects to help with Chrome GC? I have seen other people mentioning similar issues but understood Chromium had resolved some.
I should say my solution is heavily based on Gaurav Mantri's blog post here: