As Evert was saying, I should have checked first for the build instructions in the documentation https://stuk.github.io/jszip/documentation/contributing.html.
From that it is clear, first one needs git and makes a local clone. Then one needs to set up the grunt command line, which requires, npm, which comes with nodejs. Once grunt runs, there are other dependencies that need to be npm install-ed. It's the usual little things off and not working, but enough Googling and brute force retrying to get it done.
Now jszip/lib/index.js contains the resource that is finally exported. It is that JSZip object. So just to play with the internal stuff, I could add these to the JSZip object, for example, it already contains:
JSZip.external = require("./external");
module.exports = JSZip;
and so we can easily add other resources we want to play with:
JSZip.flate = require("./flate");
JSZip.DataWorker = require('./stream/DataWorker');
JSZip.DataLengthProbe = require('./stream/DataLengthProbe');
JSZip.Crc32Probe = require('./stream/Crc32Probe');
JSZip.StreamHelper = require('./stream/StreamHelper');
JSZip.pako = require("pako");
Now with that, I can create a proof of concept in the Chrome debugger:
(new JSZip.StreamHelper(
(new JSZip.DataWorker(Promise.resolve("Hello World! Hello World! Hello World! Hello World! Hello World! Hello World!")))
.pipe(new JSZip.DataLengthProbe("uncompressedSize"))
.pipe(new JSZip.Crc32Probe())
.pipe(JSZip.flate.compressWorker({}))
.pipe(new JSZip.DataLengthProbe("compressedSize"))
.on("end", function(event) { console.log("onEnd: ", this.streamInfo) }),
"uint8array", "")
).accumulate(function(data) { console.log("acc: ", data); })
.then(function(data) { console.log("then: ", data); })
and this works. I have been making myself a GZipFileStream with gzip header and trailer, creating everything correctly. I put a jszip/lib/generate/GZipFileWorker.js in as follows:
'use strict';
var external = require('../external');
var utils = require('../utils');
var flate = require('../flate');
var GenericWorker = require('../stream/GenericWorker');
var DataWorker = require('../stream/DataWorker');
var StreamHelper = require('../stream/StreamHelper');
var DataLengthProbe = require('../stream/DataLengthProbe');
var Crc32Probe = require('../stream/Crc32Probe');
function GZipFileWorker() {
GenericWorker.call(this, "GZipFileWorker");
this.virgin = true;
}
utils.inherits(GZipFileWorker, GenericWorker);
GZipFileWorker.prototype.processChunk = function(chunk) {
if(this.virgin) {
this.virgin = false;
var headerBuffer = new ArrayBuffer(10);
var headerView = new DataView(headerBuffer);
headerView.setUint16(0, 0x8b1f, true); // GZip magic
headerView.setUint8(2, 0x08); // compression algorithm DEFLATE
headerView.setUint8(3, 0x00); // flags
// bit 0 FTEXT
// bit 1 FHCRC
// bit 2 FEXTRA
// bit 3 FNAME
// bit 4 FCOMMENT
headerView.setUint32(4, (new Date()).getTime()/1000>>>0, true);
headerView.setUint8(8, 0x00); // no extension headers
headerView.setUint8(9, 0x03); // OS type UNIX
this.push({data: new Uint8Array(headerBuffer)});
}
this.push(chunk);
};
GZipFileWorker.prototype.flush = function() {
var trailerBuffer = new ArrayBuffer(8);
var trailerView = new DataView(trailerBuffer);
trailerView.setUint32(0, this.streamInfo["crc32"]>>>0, true);
trailerView.setUint32(4, this.streamInfo["originalSize"]>>>0 & 0xffffffff, true);
this.push({data: new Uint8Array(trailerBuffer)});
};
exports.gzip = function(data, inputFormat, outputFormat, compressionOptions, onUpdate) {
var mimeType = data.contentType || data.mimeType || "";
if(! (data instanceof GenericWorker)) {
inputFormat = (inputFormat || "").toLowerCase();
data = new DataWorker(
utils.prepareContent(data.name || "gzip source",
data,
inputFormat !== "string",
inputFormat === "binarystring",
inputFormat === "base64"));
}
return new StreamHelper(
data
.pipe(new DataLengthProbe("originalSize"))
.pipe(new Crc32Probe())
.pipe(flate.compressWorker( compressionOptions || {} ))
.pipe(new GZipFileWorker()),
outputFormat.toLowerCase(), mimeType).accumulate(onUpdate);
};
and in jszip/lib/index.js I need just this:
var gzip = require("./generate/GZipFileWorker");
JSZip.gzip = gzip.gzip;
and this works like that:
JSZip.gzip("Hello World! Hello World! Hello World! Hello World! Hello World! Hello World!", "string", "base64", {level: 3}).then(function(result) { console.log(result); })
I can paste the result into a UNIX pipe like this:
$ echo -n "H4sIAOyR/VsAA/NIzcnJVwjPL8pJUVTwoJADAPCORolNAAAA" |base64 -d |zcat
and it correctly returns
Hello World! Hello World! Hello World! Hello World! Hello World! Hello World!
It can also be used with files:
JSZip.gzip(file, "", "Blob").then(function(blob) {
xhr.setRequestProperty("Content-encoding", "gzip");
xhr.send(blob);
})
and I can send the blob to my web server. I have checked that indeed the large file is processed in chunks.
The only thing I don't like about this is that the final blob is still assembled as one big Blob, so I am assuming it holds all compressed data in memory. It would be better if that Blow was an end-point of that Worker pipeline so that when the xhr.send grabs the data chunk-wise from the Blob, it would consume chunks from the Worker pipeline only then. However, the impact is lessened a lot given that it only holds compressed content, and likely (for me at least) large files would be multi-media files that won't need to be gzip compressed anyway.
I did not write a gunzip function, because frankly, I don't need one and I don't want to make one that fails to properly parse extension headers in the gzip headers. As soon as I have uploaded compressed content to the server (S3 in my case), when I'm fetching it again I assume the browser would do the decompressing for me. I haven't checked that though. If it's becoming a problem I'll come back end edit this answer more.
Here is my fork on github: https://github.com/gschadow/jszip, pull request already entered.