0

I have a Sheets that contains text as défi (défi) or Österreich (Östereeich).

I can decode the first by this script, on the other hand I am not able to do the second (I mean the code takes 3 bytes)

Thanks for any help!

function decode(txt){
  var texte = []
  for (i=0;i<txt.length;i++){
    var n = txt.substring(i,i+1).charCodeAt()
    if (n>127){
      if ((n & 32) > 0){
        //texte.push(decode_utf8(txt.substring(i,i+3))) ??
        i+=2
      }
      else{
        texte.push(decode_utf8(txt.substring(i,i+2)))
        i++
      }
    }
    else{
      texte.push(txt.substring(i,i+1))
    }
  }
  return (texte.join(''))
}
function decode_utf8(s) {
  return decodeURIComponent(escape(s));
}
TheMaster
  • 45,448
  • 6
  • 62
  • 85
Mike Steelson
  • 14,650
  • 2
  • 5
  • 20
  • 1
    https://stackoverflow.com/questions/7885096/how-do-i-decode-a-string-with-escaped-unicode – Cooper Dec 10 '21 at 17:43
  • 1
    https://stackoverflow.com/questions/15929686/how-to-decode-unicode-html-by-javascript/15929722 – Cooper Dec 10 '21 at 17:45
  • 1
    Could you provide expected output for the input? – TheMaster Dec 10 '21 at 21:00
  • @Cooper : the problem remains, how can I know \uxxxx for `Ö` in advance since I don't know that this character coded on 3 bytes 226|128|147 is Ö / @TheMaster ; as I mentionned ... `défi = défi` and `Österreich = Östereeich` – Mike Steelson Dec 11 '21 at 00:46
  • I did some research to try to assist you but in reality I know very little about this sort of thing. It's something I'd like to know more about but as yet I have not found a pressing need. – Cooper Dec 11 '21 at 03:36
  • Thank you very much Cooper for all your efforts! After a night of work and research I finally solved my problem. – Mike Steelson Dec 11 '21 at 04:27

1 Answers1

0

Here is a solution ... based on github

function utf8decode(utftext) {
  var string = "";
  var i = 0;
  var c = c1 = c2 = 0;
  while ( i < utftext.length ) {
    c = utftext.charCodeAt(i);
    if (c < 128) {
      string += String.fromCharCode(c);
      i++;
    }
    else if((c > 191) && (c < 224)) {
      c2 = utftext.charCodeAt(i+1);
      string += String.fromCharCode(((c & 31) << 6) | (c2 & 63));
      i += 2;
    }
    else {
      c2 = utftext.charCodeAt(i+1);
      c3 = utftext.charCodeAt(i+2);
      string += String.fromCharCode(((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63));
      i += 3;
    }
  }
  return string;
}
Mike Steelson
  • 14,650
  • 2
  • 5
  • 20