0

I have a WebSocket that deliver text strings to my page from different services. Some of these strings contain strange characters.

Sometimes i receive strings like:

"stuff stuff %91quoted stuff%92 stuff"

and similar. I found that this specific service send me string in windows-1252 (or CP1252) charset.

Which is the best (and fastest) way, with javascript, to have my all string in UTF-8 or just to convert the hex characters?

gariepy
  • 3,576
  • 6
  • 21
  • 34
Plastic
  • 9,874
  • 6
  • 32
  • 53
  • Can you post your code? You could use a `str.replace()` function. – Ciprian Oct 06 '15 at 15:39
  • so you are suggesting to make a "cp1252" map and simply use str.replace?? I don't have any code yet cause i tried with "htmlentities", with "decodeURIComponent" ecc ecc. do you know where can i, eventually, find a comparsion table to copy and use as a map? – Plastic Oct 06 '15 at 15:41
  • See my answer below. – Ciprian Oct 06 '15 at 15:51
  • you should not need to convert it yourself; sounds like an encoding is mis-labelled on the input side, going to the server... – dandavis Oct 06 '15 at 16:25

4 Answers4

1

Looks like you have percent encoded Windows-1252 characters.

You can replace them with replace, if you create a map from Windows-1252 codes to Unicode characters.

var win1252toUnicode = {
    ...
    91:"‘",
    92:"’",
    ...
};
var encoded = "stuff stuff %91quoted stuff%92 stuff";
var decoded = encoded.replace(/%([0-9abcdef]{2})/ig, function(match, code) {
    return win1252toUnicode[code];
})

With this you get:

"stuff stuff ‘quoted stuff’ stuff"
gre_gor
  • 6,669
  • 9
  • 47
  • 52
1
function cp1252_to_utf8(txt) {
    return txt.replace(/%[0-9abcdef]{2}/gi, hex => {
        return String.fromCharCode(parseInt(hex.replace('%', ''), 16));
    });
}
  • Your answer was flagged as low-quality because of its length and content. Please consider explaining what your code does. – Frank Fajardo Aug 06 '17 at 23:48
  • While answers are always appreciated, it really helps to provide some information about how your code solves the problem at hand. Not everyone may be familiar with your exact coding logic, but may understand your general *approach* or *concept*. To help improve your answer, please provide some [**context surrounding your answer**](https://meta.stackexchange.com/questions/114762), and see the help article on [**writing great answers**](http://stackoverflow.com/help/how-to-answer) for some tips on how to make your answers count :) – Obsidian Age Aug 06 '17 at 23:58
0

Here is a character map:

http://www.obkb.com/dcljr/charstxt.html

Use this code to replace characters in your code:

var str = "stuff stuff %91quoted stuff%92 stuff";
var res = str.replace("%91", "‘");

Also, check this question for multiple replacement:

Replace multiple strings at once

Community
  • 1
  • 1
Ciprian
  • 872
  • 1
  • 10
  • 30
0

I decided to answer myself cause there isn't a complete one yet and in the mean time I figured it out. Thanks to @Ciprian for the link to the characters map and thanks to @gre_gor for the suggestion for the replacing function.

I was not able to find a complete map to copy so i had to merge some of them founded on internet to make my own, I also write down a function to "encode" and I would like to share it in case someone else need it.

function cp1252_to_utf8(txt) {
var chars_map = {
    "21": "!", "22": '"', "23": "#", "24": "$", "25": "%", "26": "&", "27": "'", "28": "(", "29": ")", "2a": "*", "2b": "+", "2c": ",",
    "2d": "-", "2e": ".", "2f": "/", "30": "0", "31": "1", "32": "2", "33": "3", "34": "4", "35": "5", "36": "6", "37": "7", "38": "8",
    "39": "9", "3a": ":", "3b": ";", "3c": "<", "3d": "=", "3e": ">", "3f": "?", "40": "@", "41": "A", "42": "B", "43": "C", "44": "D",
    "45": "E", "46": "F", "47": "G", "48": "H", "49": "I", "4a": "J", "4b": "K", "4c": "L", "4d": "M", "4e": "N", "4f": "O", "50": "P",
    "51": "Q", "52": "R", "53": "S", "54": "T", "55": "U", "56": "V", "57": "W", "58": "X", "59": "Y", "5a": "Z", "5b": "[", "5c": "\\",
    "5d": "]", "5e": "^", "5f": "_", "60": "`", "61": "a", "62": "b", "63": "c", "64": "d", "65": "e", "66": "f", "67": "g", "68": "h",
    "69": "i", "6a": "j", "6b": "k", "6c": "l", "6d": "m", "6e": "n", "6f": "o", "70": "p", "71": "q", "72": "r", "73": "s", "74": "t",
    "75": "u", "76": "v", "77": "w", "78": "x", "79": "y", "7a": "z", "7b": "{", "7c": "|", "7d": "}", "7e": "~", "a1": "¡", "a2": "¢",
    "a3": "£", "a4": "¤", "a5": "¥", "a6": "¦", "a7": "§", "a8": "¨", "a9": "©", "aa": "ª", "ab": "«", "ac": "¬", "a0": " ", "ae": "®",
    "af": "¯", "ad": " ", "b0": "°", "b1": "±", "b2": "²", "b3": "³", "b4": "´", "b5": "µ", "b6": "¶", "b7": "·", "b8": "¸", "b9": "¹",
    "ba": "º", "bb": "»", "bc": "¼", "bd": "½", "be": "¾", "bf": "¿", "c0": "À", "c1": "Á", "c2": "Â", "c3": "Ã", "c4": "Ä", "c5": "Å",
    "c6": "Æ", "c7": "Ç", "c8": "È", "c9": "É", "ca": "Ê", "cb": "Ë", "cc": "Ì", "cd": "Í", "ce": "Î", "cf": "Ï", "d0": "Ð", "d1": "Ñ",
    "d2": "Ò", "d3": "Ó", "d4": "Ô", "d5": "Õ", "d6": "Ö", "d7": "×", "d8": "Ø", "d9": "Ù", "da": "Ú", "db": "Û", "dc": "Ü", "dd": "Ý",
    "de": "Þ", "df": "ß", "e0": "à", "e1": "á", "e2": "â", "e3": "ã", "e4": "ä", "e5": "å", "e6": "æ", "e7": "ç", "e8": "è", "e9": "é",
    "ea": "ê", "eb": "ë", "ec": "ì", "ed": "í", "ee": "î", "ef": "ï", "f0": "ð", "f1": "ñ", "f2": "ò", "f3": "ó", "f4": "ô", "f5": "õ",
    "f6": "ö", "f7": "÷", "f8": "ø", "f9": "ù", "fa": "ú", "fb": "û", "fc": "ü", "fd": "ý", "fe": "þ", "ff": "ÿ", "91": "‘", "92": "’",
    "80": "€", "83": "ƒ", "85": "…", "86": "†", "87": "‡", "88": "ˆ", "89": "‰", "8a": "Š", "8b": "‹", "8c": "Œ", "8e": "Ž", "93": "“",
    "94": "”", "95": "•", "96": "–", "97": "—", "98": "˜", "99": "™", "9a": "š", "9b": "›", "9c": "œ", "9e": "ž", "9f": "Ÿ"
};
return txt.replace(/%([0-9abcdef]{2})/ig, function (match, code) {
    return chars_map[code];
});
}

It will obviously be used like:

var utf8_txt = cp1252_to_utf8(cp1552_txt);

I hope it will be usefull for someone

Plastic
  • 9,874
  • 6
  • 32
  • 53