I am working on a web app (pure HTML/Javascript, no libraries) that does byte-level processing of a file (Huffman Encoding demo). It works beautifully (you do NOT want to know how long it took to get there), but my sense of completion is bothering me just a bit because I have to load the files to and from an ArrayBuffer instead of streaming from the HDD. There's also a filesize limitation, although it would admittedly take quite a long time to compress a 4GB file (the maximum that my data structures support).
Still, in the interest of making this app work on low-resource devices, how might I stream a file from a file input
box (I need multiple passes for the frequency counting, filesize detection, and actual write) and to a browser download of some sort (that's in one pass at least, thankfully)?
Here are the relevant functions that handle it right now (I apologize for the globals :P):
//Load the file
function startProcessingFile(){ //Loads the file and sets up a callback to start the main process when done.
var ff=document.getElementById("file");//I am assuming that you don't need to see the HTML here. :D
if (ff.files.length === 0) {
displayError("No file selected");
}
else{
displayStatus("Loading File...");
var fr = new FileReader;
fr.onload=function () {inp = new DataView(fr.result); boot();}
fr.onerror=function () {displayError(fr.error)};
fr.readAsArrayBuffer(ff.files[0]);
}
}
//A bit later on -- one of the functions that reads the data from the input file
function countTypes(c){ //counts the frequencies. c is # bytes processed.
if (die){
die=false;
return;
}
var i=Math.ceil(inputSize/100.0);
while (c<inputSize && i>0){
var d=inp.getUint8(c);
frequencies[d]=frequencies[d]+1;
i--;
c++;//Accidental, but funny.
}
var perc=100.0*c/inputSize;
updateProgress(perc);
if (c<inputSize){
setTimeout(function () {countTypes(c);}, 0);
}
else{
updateProgress(100);
system_state++;
taskHandle();
}
}
//Here's where the file is read the last time and also where the bits come from that I want to save. If I could stream the data directly I could probably even get rid of the dry-run stage I currently need to count how many bytes to allocate for the output ArrayBuffer. I mean, Google Drive can download files without telling the browser the size, just whether it's done yet or not, so I'd assume that's a feature I could access here too. I'm just not sure how you actually gain access to a download from JS in the first place.
function encode(c,d){ //performs the Huffman encoding.
//If d is true, does not actually write. c is # of bits processed so far.
if (die){
die=false;
return;
}
var i=Math.ceil(inputSize/250.0);
while (c<inputSize && i>0){
var b=inp.getUint8(c);
var seq;
for (var j=0; j<table.length; j++){
if (table[j].value===b){
seq=table[j].code
}
}
for (var j=0; j<seq.length; j++){
writeBit(seq[j],d);
}
i--;
c++;//Accidental, but funny.
}
var perc=100.0*c/inputSize;
updateProgress(perc);
if (c<inputSize){
setTimeout(function () {encode(c,d);}, 0);
}
else{
updateProgress(100);
system_state++;
taskHandle();
}
}
//Finally, bit-level access for unaligned read/write so I can actually take advantage of the variable word size of the Huffman encoding (the read is used for decoding).
function readBit(){ //reads one bit (b) from the ArrayBuffer/DataView. The offset of 4 is for the filesize int.
var data_byte=inp.getUint8(byte_index+4);
var res=data_byte>>>bit_index;
bit_index+=1;
if (bit_index>7){
bit_index=0;
byte_index++;
}
return (res&1);
}
function writeBit(b,d){ //writes one bit (b) to the output Arraybuffer/Dataview. If d is true, does not actually write.
if (d===false){ //i.e. not dry-run mode
var bitmask=0xff;
var flag=1<<bit_index;
bitmask=bitmask^flag;
current_byte=current_byte&bitmask;
current_byte=current_byte|(b<<bit_index);
output.setUint8(byte_index+4, current_byte);
}
bit_index+=1;
if (bit_index>7){
bit_index=0;
byte_index++;
}
}
function readByte(){ //reads a byte using readBit. Unaligned.
var b=0;
for (var i=0; i<8; i++){
var t=readBit();
b=b|(t<<i);
}
return b;
}
function writeByte(b,d){ //writes a byte using writeByte. Unaligned.
for (var i=0; i<8; i++){
var res=b>>>i;
writeBit((res&1),d);
}
}
//And finally the download mechanism I'm using.
function downloadResult(){//download processed file with specified extension
var blobObject = new Blob([output], {type: 'application/octet-stream'});
var n=source_name.split('\\').pop().split('/').pop();
if (doEncode){
n=n+fext
}else{
n=n.replace(fext,"");
}
var a = document.createElement("a");
a.setAttribute("href", URL.createObjectURL(blobObject));
a.setAttribute("download", n);
a.click();
delete a;
running=false;
var b=document.getElementById("ac");
if (b.classList.contains("activeNav")){
clearRes();
}
}
I basically want to rip most of that out and replace it with something that can read bytes or medium-ish chunks of data out of the file that the user selects, and then when it gets to the actual output stage, trickle that data byte-by-byte through a more-or-less vanilla download to their download folder.
I do know that multiple files can be selected in a file input box, so perhaps if it's possible to download to a subfolder I could work out how to make an in-browser file archiver for the heck of it. Wouldn't that be fun! ...Mind, I'm fairly sure it's not possible (I don't see why you shouldn't be able to create a subdirectory in the browser downloads folder from the webpage, but there's probably a security reason).
Let me know if you need to see more code, but as this is a class project I don't want to get accused of plagiarizing my own app...