1

I've been experimenting with the file system access API recently to try to allow downloads of large directories from my site, where a page will start a write stream, start fetching the files, consolidate them into a zip file using JSZip, and save the zip file to the disk. I'll leave the download stream running for an hour or two and then come back, and the browser always craps itself around 20-25 gb without fail.

JSZip's generateInternalStream method seems to stop writing data to the disk, the zip.crswap file will just remain the same size, and the console will start throwing opaque errors like DOMException: The requested file could not be read, typically due to permission problems that have occurred after a reference to a file was acquired. Here's what my code looks like:


  function batchDownload(database,folderpath){
    
    if(folderpath.endsWith("/")){
      folderpath = folderpath.slice(0,-1);
    }
    
    if(folderpath.startsWith("/")){
      folderpath = folderpath.slice(1);
    }

    var folderName = folderpath.split(/\//).pop();
    var folder = goTo(database,folderpath);
    var filepaths = getFilepaths(folder);
    
    var options = {
      suggestedName : folderName + ".zip",
    };

    showSaveFilePicker(options).then(handle=>{      
      handle.createWritable().then(writer=>{
        
        var zip = new JSZip();

        var resolvers = [];
      
        for(let filepath of filepaths){
          zip.file( filepath, new Promise((resolve,reject)=>{
            resolvers.push([filepath,resolve]);
          }));
        }

        var totalBytesWritten = 0;

        function mergeUi8s(ui8s){
          var length = 0;
          ui8s.forEach(item => {
            length += item.length;
          });
          var mergedUi8 = new Uint8Array(length);
          var offset = 0;
          ui8s.forEach(item => {
            mergedUi8.set(item, offset);
            offset += item.length;
          });
          return mergedUi8;
        }

        var queue = [];
        var finished = false;

        var stream = zip.generateInternalStream({
          type : "uint8array",
          streamFiles : true,
        }).on("data",(data,_metadata)=>{
          queue.push(data);
          totalBytesWritten += data.length;
        }).on("error",(e)=>{
          console.error(e);
          finished = true;
        }).on("end",()=>{
          finished = true;
        }).resume();

        !function pump(){
          if(finished && !queue.length){
          }else{
            if(queue.length){
              var superUi8 = mergeUi8s(queue);
              queue = [];
              writer.write(superUi8).then(pump).catch(e=>console.error(e));
            }else{
              setTimeout(pump);
            }
          }
        }();
  
        resolvers.forEachAsyncSingleThreaded((e,n,a,next)=>{
          stream.pause();
          var filepath  = e[0];
          var resolve   = e[1];
          batchChunkDownload(goTo( folder, filepath ),blob =>{
            stream.resume();
            resolve(blob);
            /* wait till queue is empty, then start next req */
            var amountOfTimesQueueWasLengthZero = 0;
            var waitUntilZeroForALongTime = setInterval(()=>{
              if(queue.length !== 0){
                amountOfTimesQueueWasLengthZero = 0;
              }else{
                amountOfTimesQueueWasLengthZero ++;
              }
              if(amountOfTimesQueueWasLengthZero > 100){
                clearInterval(waitUntilZeroForALongTime);
                next();
              }else{
                console.log("waiting for queue to empty, queue length :"+queue.length);
              }
            },20);
          },p=>{
            console.log(filepath+" downloading..."+ ~~(p*100)+"%");
            var bytesWrittenSpan = document.querySelector("span.bytes-written");
            if(bytesWrittenSpan){
              bytesWrittenSpan.textContent = humanReadableByteLengthRounded(totalBytesWritten);
            }
            var taskProgress = document.querySelector("progress.task");
            if(taskProgress){
              taskProgress.value = (~~(p*100))/100;
            }
          })
        },()=>{
          console.warn("SHOULD BE DONE????");
          writer.close();
        });

      }).catch(e=>console.error(e));
    }).catch(e=>console.error(e));
    
  }

Anyways, the strange part is that this doesnt happen with just the file system access api, I was also having the same limit when using streamsaver.js too.

Is ~20 GB the hard limit of a file size that JSZip is designed to handle or is this something with the browser?

roachie
  • 31
  • 3

0 Answers0