I would like to extract each character from this string and output an array
String = "你is我"
to
array = ["你", "is", "我"]
How can I do that in javascript?
I would like to extract each character from this string and output an array
String = "你is我"
to
array = ["你", "is", "我"]
How can I do that in javascript?
You can use regex for your problem. If you also want to find other characters, you can add them in the brackets:
const regex = /[a-zA-Z0-9]{1,}/gm;
const str = `你is我你is我你is我你is我你is我`;
let m;
while ((m = regex.exec(str)) !== null) {
// This is necessary to avoid infinite loops with zero-width matches
if (m.index === regex.lastIndex) {
regex.lastIndex++;
}
// The result can be accessed through the `m`-variable.
m.forEach((match, groupIndex) => {
console.log(`Found match, group ${groupIndex}: ${match}`);
});
}
Well, this took some effort but here it is. The idea is to check the unicode of each character. Here I have checked the range of basic Latin English alphabet range. You can go with checking Chinese unicode range as well.
var s = "你is我";
function entityForSymbolInContainer(s) {
var code = s.charCodeAt(0);
var codeHex = code.toString(16).toUpperCase();
while (codeHex.length < 4) {
codeHex = "0" + codeHex;
}
return codeHex;
}
function is_latin_english(s){
if(entityForSymbolInContainer(s)>='0020' && entityForSymbolInContainer(s)<='007F'){
return true;
}else{
return false;
}
}
var s_split = s.split('');
var s_result=[];
s_result.push(s_split[0]);
for(var i=1;i<s_split.length;i++){
if(is_latin_english(s_result[s_result.length-1])==is_latin_english(s_split[i])){
s_result[s_result.length-1]+=s_split[i];
}else{
s_result.push(s_split[i]);
}
}
console.log(s_result);
I used method mentioned here to obtain the Unicode of each character.
Range used for filtering Latin English characters - https://jrgraphix.net/r/Unicode/0020-007F
You can do this with the help of (Spread syntax)[https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Operators/Spread_syntax] and with simple for loop.
const str = "你is我";
//const str = "你is我test hello";
var splittedStr = [...str];
var arrayLength = splittedStr.length;
var words = [];
var englishWord = "";
var i;
for (i = 0; i < arrayLength; i += 1) {
if (/^[a-zA-Z]+$/.test(splittedStr[i])) {
englishWord += splittedStr[i];
} else if (/(\s)+$/.test(splittedStr[i])) {
if (englishWord !== "") {
words.push(englishWord);
englishWord = "";
}
} else {
if (englishWord !== "") {
words.push(englishWord);
englishWord = "";
}
words.push(splittedStr[i]);
}
}
if (englishWord !== "") {
words.push(englishWord);
}
console.log(words);