0

I am trying to convert this http cache into a normal html page so I can see the content.

Here is the http cache I am trying to convert: http://mediapreset.com/http-cache.log

There is this site that can help with converting http cache page into a normal html page, but their service is not working:

http://www.sensefulsolutions.com/2012/01/viewing-chrome-cache-easy-way.html

They do however provide this function:

(function() {
    var preTags = document.getElementsByTagName('pre');
    var preWithHeaderInfo = preTags[0];
    var preWithContent = preTags[2];

    var lines = preWithContent.textContent.split('\n');

    // get data about the formatting (changes between different versions of chrome)
    var rgx = /^(0{8}:\s+)([0-9a-f]{2}\s+)[0-9a-f]{2}/m;
    var match = rgx.exec(lines[0]);

    var text = '';
    for (var i = 0; i < lines.length; i++) {
        var line = lines[i];
        var firstIndex = match[1].length; // first index of the chars to match (e.g. where a '84' would start)
        var indexJump = match[2].length; // how much space is between each set of numbers
        var totalCharsPerLine = 16;
        index = firstIndex;
        for (var j = 0; j < totalCharsPerLine; j++) {
            var hexValAsStr = line.substr(index, 2);
            if (hexValAsStr == '  ') {
                // no more chars
                break;
            }

            var asciiVal = parseInt(hexValAsStr, 16);
            text += String.fromCharCode(asciiVal);

            index += indexJump;
        }
    }

    var headerText = preWithHeaderInfo.textContent;
    var elToInsertBefore = document.body.childNodes[0];
    var insertedDiv = document.createElement("div");
    document.body.insertBefore(insertedDiv, elToInsertBefore);

    // find the filename
    var nodes = [document.body];
    var filepath = '';
    while (true) {
        var node = nodes.pop();
        if (node.hasChildNodes()) {
            var children = node.childNodes;
            for (var i = children.length - 1; i >= 0; i--) {
                nodes.push(children[i]);
            }
        }

        if (node.nodeType === Node.TEXT_NODE && /\S/.test(node.nodeValue)) {
            // 1st depth-first text node (with non-whitespace chars) found
            filepath = node.nodeValue;
            break;
        }
    }

    outputResults(insertedDiv, convertToBase64(text), filepath, headerText);

    insertedDiv.appendChild(document.createElement('hr'));

    function outputResults(parentElement, fileContents, fileUrl, headerText) {
        // last updated 1/27/12
        var rgx = /.+\/([^\/]+)/;
        var filename = rgx.exec(fileUrl)[1];

        // get the content type
        rgx = /content-type: (.+)/i;
        var match = rgx.exec(headerText);
        var contentTypeFound = match != null;
        var contentType = "text/plain";
        if (contentTypeFound) {
            contentType = match[1];
        }

        var dataUri = "data:" + contentType + ";base64," + fileContents;

        // check for gzipped file
        var gZipRgx = /content-encoding: gzip/i;
        if (gZipRgx.test(headerText)) {
            filename += '.gz';
        }

        // check for image
        var imageRgx = /image/i;
        var isImage = imageRgx.test(contentType);

        // create link
        var aTag = document.createElement('a');
        aTag.textContent = "Left-click to download the cached file";
        aTag.setAttribute('href', dataUri);
        aTag.setAttribute('download', filename);
        parentElement.appendChild(aTag);
        parentElement.appendChild(document.createElement('br'));

        // create image
        if (isImage) {
            var imgTag = document.createElement('img');
            imgTag.setAttribute("src", dataUri);
            parentElement.appendChild(imgTag);
            parentElement.appendChild(document.createElement('br'));
        }

        // create warning
        if (!contentTypeFound) {
            var pTag = document.createElement('p');
            pTag.textContent = "WARNING: the type of file was not found in the headers... defaulting to text file.";
            parentElement.appendChild(pTag);
        }
    }

    function getBase64Char(base64Value) {
        if (base64Value < 0) {
            throw "Invalid number: " + base64Value;
        } else if (base64Value <= 25) {
            // A-Z
            return String.fromCharCode(base64Value + "A".charCodeAt(0));
        } else if (base64Value <= 51) {
            // a-z
            base64Value -= 26; // a
            return String.fromCharCode(base64Value + "a".charCodeAt(0));
        } else if (base64Value <= 61) {
            // 0-9
            base64Value -= 52; // 0
            return String.fromCharCode(base64Value + "0".charCodeAt(0));
        } else if (base64Value <= 62) {
            return '+';
        } else if (base64Value <= 63) {
            return '/';
        } else {
            throw "Invalid number: " + base64Value;
        }
    }

    function convertToBase64(input) {
        // http://en.wikipedia.org/wiki/Base64#Example
        var remainingBits;
        var result = "";
        var additionalCharsNeeded = 0;

        var charIndex = -1;
        var charAsciiValue;
        var advanceToNextChar = function() {
            charIndex++;
            charAsciiValue = input.charCodeAt(charIndex);
            return charIndex < input.length;
        };

        while (true) {
            var base64Char;

            // handle 1st char
            if (!advanceToNextChar()) break;
            base64Char = charAsciiValue >>> 2;
            remainingBits = charAsciiValue & 3; // 0000 0011
            result += getBase64Char(base64Char); // 1st char
            additionalCharsNeeded = 3;

            // handle 2nd char
            if (!advanceToNextChar()) break;
            base64Char = (remainingBits << 4) | (charAsciiValue >>> 4);
            remainingBits = charAsciiValue & 15; // 0000 1111
            result += getBase64Char(base64Char); // 2nd char
            additionalCharsNeeded = 2;

            // handle 3rd char
            if (!advanceToNextChar()) break;
            base64Char = (remainingBits << 2) | (charAsciiValue >>> 6);
            result += getBase64Char(base64Char); // 3rd char
            remainingBits = charAsciiValue & 63; // 0011 1111
            result += getBase64Char(remainingBits); // 4th char
            additionalCharsNeeded = 0;
        }

        // there may be an additional 2-3 chars that need to be added
        if (additionalCharsNeeded == 2) {
            remainingBits = remainingBits << 2; // 4 extra bits
            result += getBase64Char(remainingBits) + "=";
        } else if (additionalCharsNeeded == 3) {
            remainingBits = remainingBits << 4; // 2 extra bits
            result += getBase64Char(remainingBits) + "==";
        } else if (additionalCharsNeeded != 0) {
            throw "Unhandled number of additional chars needed: " + additionalCharsNeeded;
        }

        return result;
    }
    })()

How to use the function above to carry out the http cache conversion?

Here is the fiddle I'm working on: https://jsfiddle.net/emporio/9jdw8r68/2/

Marco V
  • 2,553
  • 8
  • 36
  • 58
  • 1
    I think your cache data is invalid. I modified your fiddle: https://jsfiddle.net/9jdw8r68/4/ and got link to download file. But downloaded file seems to be invalid. When i tested it on some other cache data (one PNG file and one HTML file) it worked ok. – ahwayakchih May 20 '16 at 18:30
  • Maybe I should save the file with .html extension? – Marco V May 20 '16 at 18:57
  • I noticed that the info page in chrome://cache regarding specific URL has 3 parts: HTTP headers, something that looks like compressed headers and maybe some additional info and content data as third. That is why original function was checking for 3 `pre` tags. I changed them to `script` tags and made only two. So when you paste cached data, paste only 1st and 3rd part from chrome://cache page. – ahwayakchih May 20 '16 at 19:27
  • Do you have an updated fiddle for me? I need to see the page behind the cache.. – Marco V May 20 '16 at 19:31
  • Just copy and paste data from your chrome cache. As for viewing HTML, i'm not sure this function will be enough. If data is gzipped, you will have to ungzip it first - i did not try, but maybe this will help: http://stackoverflow.com/questions/4875020/javascript-decompress-inflate-unzip-ungzip-strings – ahwayakchih May 20 '16 at 19:41

1 Answers1

1

Here's a modified version that outputs ungzipped data to console:

(function convertCache() {
    var preTags = document.querySelectorAll('script[type="http-cache"]');
    var preWithHeaderInfo = preTags[0];
    var preWithContent = preTags[1];

    var lines = preWithContent.textContent.split('\n');

    // get data about the formatting (changes between different versions of chrome)
    var rgx = /^(0{8}:\s+)([0-9a-f]{2}\s+)[0-9a-f]{2}/m;
    var match = rgx.exec(lines[0]);

    var text = '';
    var data = [];
    for (var i = 0; i < lines.length; i++) {
        var line = lines[i];
        var firstIndex = match[1].length; // first index of the chars to match (e.g. where a '84' would start)
        var indexJump = match[2].length; // how much space is between each set of numbers
        var totalCharsPerLine = 16;
        index = firstIndex;
        for (var j = 0; j < totalCharsPerLine; j++) {
            var hexValAsStr = line.substr(index, 2);
            if (hexValAsStr == '  ') {
                // no more chars
                break;
            }

            var asciiVal = parseInt(hexValAsStr, 16);
            data.push(asciiVal);
            text += String.fromCharCode(asciiVal);

            index += indexJump;
        }
    }

    var headerText = preWithHeaderInfo.textContent;
    var elToInsertBefore = document.body.childNodes[0];
    var insertedDiv = document.createElement("div");
    document.body.insertBefore(insertedDiv, elToInsertBefore);

    // find the filename
    var nodes = [document.body];
    var filepath = '';
    while (true) {
        var node = nodes.pop();
        if (node.hasChildNodes()) {
            var children = node.childNodes;
            for (var i = children.length - 1; i >= 0; i--) {
                nodes.push(children[i]);
            }
        }

        if (node.nodeType === Node.TEXT_NODE && /\S/.test(node.nodeValue)) {
            // 1st depth-first text node (with non-whitespace chars) found
            filepath = node.nodeValue;
            break;
        }
    }

    var gZipRgx = /content-encoding: gzip/i;
    if (gZipRgx.test(headerText)) {
        var binData = new Uint8Array(data);
        var data = pako.ungzip(binData);
        var strData = String.fromCharCode.apply(null, new Uint8Array(data));
        console.log(strData);
    }

    outputResults(insertedDiv, convertToBase64(text), filepath, headerText);

    insertedDiv.appendChild(document.createElement('hr'));

    function outputResults(parentElement, fileContents, fileUrl, headerText) {
        // last updated 1/27/12
        var rgx = /.+\/([^\/]+)/;
        var filename = rgx.exec(fileUrl)[1];

        // get the content type
        rgx = /content-type: (.+)/i;
        var match = rgx.exec(headerText);
        var contentTypeFound = match != null;
        var contentType = "text/plain";
        if (contentTypeFound) {
            contentType = match[1];
        }

        var dataUri = "data:" + contentType + ";base64," + fileContents;

        // check for gzipped file
        var gZipRgx = /content-encoding: gzip/i;
        if (gZipRgx.test(headerText)) {
            filename += '.gz';
        }

        // check for image
        var imageRgx = /image/i;
        var isImage = imageRgx.test(contentType);

        // create link
        var aTag = document.createElement('a');
        aTag.textContent = "Left-click to download the cached file";
        aTag.setAttribute('href', dataUri);
        aTag.setAttribute('download', filename);
        parentElement.appendChild(aTag);
        parentElement.appendChild(document.createElement('br'));

        // create image
        if (isImage) {
            var imgTag = document.createElement('img');
            imgTag.setAttribute("src", dataUri);
            parentElement.appendChild(imgTag);
            parentElement.appendChild(document.createElement('br'));
        }

        // create warning
        if (!contentTypeFound) {
            var pTag = document.createElement('p');
            pTag.textContent = "WARNING: the type of file was not found in the headers... defaulting to text file.";
            parentElement.appendChild(pTag);
        }
    }

    function getBase64Char(base64Value) {
        if (base64Value < 0) {
            throw "Invalid number: " + base64Value;
        } else if (base64Value <= 25) {
            // A-Z
            return String.fromCharCode(base64Value + "A".charCodeAt(0));
        } else if (base64Value <= 51) {
            // a-z
            base64Value -= 26; // a
            return String.fromCharCode(base64Value + "a".charCodeAt(0));
        } else if (base64Value <= 61) {
            // 0-9
            base64Value -= 52; // 0
            return String.fromCharCode(base64Value + "0".charCodeAt(0));
        } else if (base64Value <= 62) {
            return '+';
        } else if (base64Value <= 63) {
            return '/';
        } else {
            throw "Invalid number: " + base64Value;
        }
    }

    function convertToBase64(input) {
        // http://en.wikipedia.org/wiki/Base64#Example
        var remainingBits;
        var result = "";
        var additionalCharsNeeded = 0;

        var charIndex = -1;
        var charAsciiValue;
        var advanceToNextChar = function() {
            charIndex++;
            charAsciiValue = input.charCodeAt(charIndex);
            return charIndex < input.length;
        };

        while (true) {
            var base64Char;

            // handle 1st char
            if (!advanceToNextChar()) break;
            base64Char = charAsciiValue >>> 2;
            remainingBits = charAsciiValue & 3; // 0000 0011
            result += getBase64Char(base64Char); // 1st char
            additionalCharsNeeded = 3;

            // handle 2nd char
            if (!advanceToNextChar()) break;
            base64Char = (remainingBits << 4) | (charAsciiValue >>> 4);
            remainingBits = charAsciiValue & 15; // 0000 1111
            result += getBase64Char(base64Char); // 2nd char
            additionalCharsNeeded = 2;

            // handle 3rd char
            if (!advanceToNextChar()) break;
            base64Char = (remainingBits << 2) | (charAsciiValue >>> 6);
            result += getBase64Char(base64Char); // 3rd char
            remainingBits = charAsciiValue & 63; // 0011 1111
            result += getBase64Char(remainingBits); // 4th char
            additionalCharsNeeded = 0;
        }

        // there may be an additional 2-3 chars that need to be added
        if (additionalCharsNeeded == 2) {
            remainingBits = remainingBits << 2; // 4 extra bits
            result += getBase64Char(remainingBits) + "=";
        } else if (additionalCharsNeeded == 3) {
            remainingBits = remainingBits << 4; // 2 extra bits
            result += getBase64Char(remainingBits) + "==";
        } else if (additionalCharsNeeded != 0) {
            throw "Unhandled number of additional chars needed: " + additionalCharsNeeded;
        }

        return result;
    }
})()

https://jsfiddle.net/9jdw8r68/6/

Data from chrome://cache info has to be split into two separate script tags (each with type="http-cache" attribute). When you open cache of specific URL and check page source code, you will see three pre tags there.

  1. copy content of first pre into first script type="http-cache" tag
  2. ignore second pre tag
  3. copy content of third pre into second script type="http-cache" tag
  4. ignore the rest

Unfortunately it throws if data is too big. You would have to find different implementation of gzip or maybe put it into webworker to workaround that.

ahwayakchih
  • 2,101
  • 19
  • 24