0

I have an array of array of numbers as data:

[[9,593,82,593,1360,593,82,582,1344,4676,1344,593,82,593,82,578,1344,593,82,577,1344,593,82,567,1328,593,82,4662,1328,593,82,4662,1328,593,82,577,1344,593,82,4674,1344,582,1344,594,1360,4693,1360,599,1360,4705,1376,582,1344,581,1344,581,1344,4677,1344,4678,1344,593,1360,4690,1360],
[9,593,82,593,1360,593,82,582,1344,4676,1344,593,82,593,82,578,1344,593,82,577,1344,593,82,567,1328,593,82,4662,1328,593,82,4662,1328,593,82,577,1344,593,82,4674,1344,582,1344,594,1360,4693,1360,599,1360,4705,1376,582,1344,581,1344,581,1344,4677,1344,4678,1344,593,1360,4690,1360]]

I can import this data into my app and load the state.

So I want to encode this into a URL so it's easier to share.

What is the most size efficient way to encode this data into a URL.

Also acceptable is any reliable third party services that I can save/load my state from, like github gist or something.

eguneys
  • 6,028
  • 7
  • 31
  • 63
  • data can get bigger than this, so that's why I am concerned. – eguneys Jul 24 '22 at 22:46
  • Does this answer your question? [How to encode array elements in javascript?](https://stackoverflow.com/questions/62157586/how-to-encode-array-elements-in-javascript) – pilchard Jul 24 '22 at 22:51
  • 2
    but remember there is an upper bound of url length. see:[What is the maximum length of a URL in different browsers?](https://stackoverflow.com/questions/417142/what-is-the-maximum-length-of-a-url-in-different-browsers) – pilchard Jul 24 '22 at 22:51
  • Limit: 2048 characters. Maybe base64 or something more efficient would do, since it's only numbers. – IT goldman Jul 24 '22 at 22:51
  • So I thought since these are essentially just numbers, maybe there is some encoding that will generate a compressed chunk of url blob. – eguneys Jul 24 '22 at 22:52
  • Also the numbers are basically integers – eguneys Jul 24 '22 at 22:53
  • 2
    I would recommend https://github.com/pieroxy/lz-string - it has a "compressTobase64" method which will compress and convert to base64 in one step - the only thing you probably should do then is covnert the base64 to "url64" ... which is url safe base64 – Jaromanda X Jul 24 '22 at 23:47
  • It also has `compressToEncodedURIComponent` which is exactly what I needed, if you post that as an answer I will accept. – eguneys Jul 25 '22 at 00:32

1 Answers1

0

Ok, so base64 is not for compression. But I just (re)invented a compression method and base64 is part of it. It compress to about 65% indeed. The idea is to compress each pair of digits to a 2 digit hex number which is one ascii char. Then base64 it for safety.

var data = [
  [0.3, 2.1, -0.04, 32, 0, 54],
  [9, 593, 82, 593, 1360, 593, 82, 582, 1344, 4676, 1344, 593, 82, 593, 82, 578, 1344, 593, 82, 577, 1344, 593, 82, 567, 1328, 593, 82, 4662, 1328, 593, 82, 4662, 1328, 593, 82, 577, 1344, 593, 82, 4674, 1344, 582, 1344, 594, 1360, 4693, 1360, 599, 1360, 4705, 1376, 582, 1344, 581, 1344, 581, 1344, 4677, 1344, 4678, 1344, 593, 1360, 4690, 1360],
  [9, 593, 82, 593, 1360, 593, 82, 582, 1344, 4676, 1344, 593, 82, 593, 82, 578, 1344, 593, 82, 577, 1344, 593, 82, 567, 1328, 593, 82, 4662, 1328, 593, 82, 4662, 1328, 593, 82, 577, 1344, 593, 82, 4674, 1344, 582, 1344, 594, 1360, 4693, 1360, 599, 1360, 4705, 1376, 582, 1344, 581, 1344, 581, 1344, 4677, 1344, 4678, 1344, 593, 1360, 4690, 1360],
];


function encode_char(c) {
  if (c == '[') return 'a';
  if (c == ']') return 'b';
  if (c == ',') return 'c';
  if (c == ' ') return 'd';
  if (c == '.') return 'e';
  if (c == '-') return 'f';
  return "" + c;
}


function decode_char(c) {
  if (c == 'a') return '[';
  if (c == 'b') return ']';
  if (c == 'c') return ',';
  if (c == 'd') return ' ';
  if (c == 'e') return '.';
  if (c == 'f') return '-';
  return "" + c;
}

function encode_string(str) {
  var result = "";
  var arr = str.split("");
  if (arr.length % 2) {
    arr.push(" ")
  }
  for (var i = 0; i < arr.length; i += 2) {
    var c1 = encode_char(arr[i])
    var c2 = encode_char(arr[i + 1])
    var hex = "" + c1 + c2
    result += String.fromCharCode(parseInt(hex, 16));
  }
  return result;
}

function decode_string(str) {
  var result = "";
  var arr = str.split("");
  for (var i = 0; i < arr.length; i += 1) {
    var char = arr[i]
    var hex = char.charCodeAt(0).toString(16);
    hex = hex.length < 2 ? "0" + hex : hex;
    var c1 = decode_char(hex.slice(0, 1))
    var c2 = decode_char(hex.slice(1, 2))
    result += "" + c1 + c2
  }
  return result.trim();
}

function encode_integers(str) {
  var result
  result = encode_string(str);
  result = btoa(result)
  result = encodeURIComponent(result)
  return result;
}

function decode_integers(str) {
  var result
  result = decodeURIComponent(str);
  result = atob(result)
  result = decode_string(result)
  return result;
}

var json = JSON.stringify(data);
console.log(json.length, json)

var str = encode_integers(json);
console.log(str.length, str)

var json = decode_integers(str)
console.log(json.length, json)

console.log("compression ratio", (str.length / json.length).toFixed(2))

UPDATE: Let's compare to lz-string.js (48% compression)

var data = [
  [0.3, 2.1, -0.04, 32, 0, 54],
  [9, 593, 82, 593, 1360, 593, 82, 582, 1344, 4676, 1344, 593, 82, 593, 82, 578, 1344, 593, 82, 577, 1344, 593, 82, 567, 1328, 593, 82, 4662, 1328, 593, 82, 4662, 1328, 593, 82, 577, 1344, 593, 82, 4674, 1344, 582, 1344, 594, 1360, 4693, 1360, 599, 1360, 4705, 1376, 582, 1344, 581, 1344, 581, 1344, 4677, 1344, 4678, 1344, 593, 1360, 4690, 1360],
  [9, 593, 82, 593, 1360, 593, 82, 582, 1344, 4676, 1344, 593, 82, 593, 82, 578, 1344, 593, 82, 577, 1344, 593, 82, 567, 1328, 593, 82, 4662, 1328, 593, 82, 4662, 1328, 593, 82, 577, 1344, 593, 82, 4674, 1344, 582, 1344, 594, 1360, 4693, 1360, 599, 1360, 4705, 1376, 582, 1344, 581, 1344, 581, 1344, 4677, 1344, 4678, 1344, 593, 1360, 4690, 1360],
];


var json = JSON.stringify(data);
console.log(json.length, json)

var str = LZString.compressToEncodedURIComponent(json);
console.log(str.length, str)

var json = LZString.decompressFromEncodedURIComponent(str)
console.log(json.length, json)

console.log("compression ratio", (str.length / json.length).toFixed(2))
<script src="https://cdnjs.cloudflare.com/ajax/libs/lz-string/1.4.4/lz-string.min.js" integrity="sha512-qoCTmFwBtCPvFhA+WAqatSOrghwpDhFHxwAGh+cppWonXbHA09nG1z5zi4/NGnp8dUhXiVrzA6EnKgJA+fyrpw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
IT goldman
  • 14,885
  • 2
  • 14
  • 28
  • it almost works too .... almost - 7 x `0`'s seem to have dropped off the output :p - many (but not all) `1360` become `136`, then there are a couple more anomalies - it's always a `0` is lost though – Jaromanda X Jul 24 '22 at 23:58
  • Fixed the zero bug. It was when hex was 1 digit. Now it's perfect. Also support for negative and decimal point are being added – IT goldman Jul 25 '22 at 00:10
  • 1
    nice home-spun answer - though, personally, I'd always opt for a library where other people have done the heavy lifting – Jaromanda X Jul 25 '22 at 00:30