According to my research and googling, Javascript seems to lack support for locale aware sorting and string comparisons. There is localeCompare()
, but it has been reported of browser specific differencies and impossibility to explicitly set which locale is used (the OS locale is not always the one wanted). There is some intentions to add collation support inside ECMAScript, but before it, we are on our own. And depending how consistent the results are across browsers, may be we are on our own forever :(.
I have the following code, which makes alphabetical sort of an array. It's made speed in mind, and the ideas are got from https://stackoverflow.com/a/11598969/1691517, to which I made some speed improvements.
In this example, the words array has 13 members and the sort-function is called 34 times. I want to replace some of the letters in the words-array (you don't have to know what replacements are made, because it's not the point in this question). If I make these replacements in sort-function ( the one that starts with return function(a, b)
), the code is inefficient, because replacements are made more than once per array member. Of course I can make these replacements outside of this closure, I mean before the line words.sort(sortbyalphabet_timo);
, but it's not what I want.
Question 1: Is it possible to modify the words-array in between the lines "PREPARATION STARTS" and "PREPARATION ENDS" so that the sort function uses modified words-array?
Question 2: Is it possible to input arguments to the closure so that code between PREPARATION STARTS and PREPARATION ENDS can use them? I have tried this without success:
var caseinsensitive = true;
words.sort( sortbyalphabet_timo(caseinsensitive) );
And here is finally the code example, and the ready to run example is in http://jsfiddle.net/3E7wb/:
var sortbyalphabet_timo = (function() {
// PREPARATION STARTS
var i, alphabet = "-0123456789AaÀàÁáÂâÃãÄäBbCcÇçDdEeÈèÉéÊêËëFfGgHhIiÌìÍíÎîÏïJjKkLlMmNnÑñOoÒòÓóÔôÕõÖöPpQqRrSsTtUuÙùÚúÛûÜüVvWwXxYyÝýŸÿZz",
index = {};
i = alphabet.length;
while (i--) index[alphabet.charCodeAt(i)] = i;
// PREPARATION ENDS
return function(a, b) {
var i, len, diff;
if (typeof a === "string" && typeof b === "string") {
(a.length > b.length) ? len = a.length : len = b.length;
for (i = 0; i < len; i++) {
diff = index[a.charCodeAt(i)] - index[b.charCodeAt(i)];
if (diff !== 0) {
return diff;
}
}
// sort the shorter first
return a.length - b.length;
} else {
return 0;
}
};
})();
var words = ['tauschen', '66', '55', '33', 'täuschen', 'andern', 'ändern', 'Ast', 'Äste', 'dosen', 'dösen', 'Donaudam-0', 'Donaudam-1'];
$('#orig').html(words.toString());
words.sort(sortbyalphabet_timo);
$('#sorted').html(words.toString());`