StringEscapeUtils.escapeXml()
in apache.commons.lang
always escapes non-ASCII characters.
If you don't want to escape Japanese characters, you have to pass only the ASCII characters in the string to StringEscapeUtils.escapeXml()
like this:
package org.example;
import java.util.Arrays;
import org.apache.commons.lang.StringEscapeUtils;
public class Test {
public static void main(String[] args) {
// You will get "言語が良くない <ABC>"
System.out.println(StringEscapeUtils.escapeXml("言語が良くない <ABC>"));
// You will get "言語が良くない <ABC>"
System.out.println(escapeXml("言語が良くない <ABC>"));
}
public static String escapeXml(String str) {
return Arrays.stream(str.split("")).map(s -> escapeCharacter(s)).collect(StringBuilder::new, StringBuilder::append, StringBuilder::append).toString();
}
public static String escapeCharacter(String str) {
if (str.matches("\\p{ASCII}")) {
return StringEscapeUtils.escapeXml(str);
} else {
return str;
}
}
}