1

I am trying to support UTF-8 characters in my ANT script.

As long as the character string are made up of 2-byte UTF-8 characters, such as:

  • Lògìñ
  • Ùsèr ÌÐ

Then things work fine.

When I use Unicode Han Character:

Which, according to this site: http://www.fileformat.info/info/unicode/char/6211/index.htm Has a UTF-8 encoding of 0xE6 0x88 0x91

I can see in UltraEdit, my input properties file has the values "E6 88 91" all in a row, so I'm fairly confident that my input is correct. And When I open the same file in Notepad++ I can see all the characters correctly.

Here is my Build Script:

<?xml version="1.0" encoding="UTF-8" ?>

<project
    name="utf8test"
    default="all"
    basedir=".">

    <target name="all">
        <loadproperties  encoding="UTF-8" srcfile="./apps.properties.all.txt"  />

        <echo>No encoding ${common.app.name}</echo>
        <echo encoding="UTF-8">UTF-8 ${common.app.name}</echo>
        <echo encoding="UnicodeLittle">UnicodeLittle ${common.app.name}</echo>
        <echo encoding="UnicodeLittleUnmarked">UnicodeLittleUnmarked ${common.app.name}</echo>
        <echo>${common.app.ServerName}</echo>
        <echo>${bb.vendor}</echo>

        <echo>No encoding ${common.app.UserIdText}</echo>
        <echo encoding="UTF-8">UTF-8 ${common.app.UserIdText}</echo>
        <echo encoding="UnicodeLittle">UnicodeLittle ${common.app.UserIdText}</echo>
        <echo encoding="UnicodeLittleUnmarked">UnicodeLittleUnmarked ${common.app.UserIdText}</echo>

        <echoproperties />
      </target>
    </project>

And here is my properties file:

common.app=VrvPsLTst
common.app.name=我們
common.app.description=Pseudo Loc Test App for Build Script testing
common.app.ServerName=http://Vèrìvò.com
bb.vendor=Vèrìvò
common.app.PasswordText=Pàsswòrð
bb.override.list=MP_COPYRIGHTTEXT, "Çòpÿrìght 2012 Vèrívó Bùîlð TéàM"
common.app.LoginButtonText=Lògìñ
common.app.UserIdText=Ùsèr ÌÐ
bb.SMSSuccess=Mèssàgéß Sùççêssfúllÿ Sëñt
common.app.LoginScreenMessage=WèlçòMé Mêssàgë
common.app.LoginProgressMessage=Àùthèñtìçàtíòñ îñ prógréss...
ios.RegistrationText=Règìstràtíòñ Téxt
ios.RegistrationURL=http://www.josscrowcroft.com/2011/code/utf-8-multibyte-characters-in-url-parameters-%E2%9C%93/

Here is what the output looks like:

Buildfile: C:\Temp\utf8\build.xml

all:
     [echo] No encoding ??
     [echo] UTF-8 ??
     [echo] ÿþU n i c o d e L i t t l e   ? ? 
     [echo] U n i c o d e L i t t l e U n m a r k e d   ? ? 
     [echo] http://Vèrìvò.com
     [echo] Vèrìvò
     [echo] No encoding Ùsèr ÌÐ
     [echo] UTF-8 Ùsèr Ì�
     [echo] ÿþU n i c o d e L i t t l e   Ù s è r   Ì Ð 
     [echo] U n i c o d e L i t t l e U n m a r k e d   Ù s è r   Ì Ð 
[echoproperties] #Ant properties
[echoproperties] #Mon Jun 18 15:25:13 EDT 2012
[echoproperties] ant.core.lib=C\:\\ant\\lib\\ant.jar
[echoproperties] ant.file=C\:\\Temp\\utf8\\build.xml
[echoproperties] ant.file.type=file
[echoproperties] ant.file.type.utf8test=file
[echoproperties] ant.file.utf8test=C\:\\Temp\\utf8\\build.xml
[echoproperties] ant.home=c\:\\ant\\bin\\..
[echoproperties] ant.java.version=1.6
[echoproperties] ant.library.dir=C\:\\ant\\lib
[echoproperties] ant.project.default-target=all
[echoproperties] ant.project.invoked-targets=all
[echoproperties] ant.project.name=utf8test
[echoproperties] ant.version=Apache Ant version 1.8.1 compiled on April 30 2010
[echoproperties] awt.toolkit=sun.awt.windows.WToolkit
[echoproperties] basedir=C\:\\Temp\\utf8
[echoproperties] bb.SMSSuccess=M\u00E8ss\u00E0g\u00E9\u00DF S\u00F9\u00E7\u00E7\u00EAssf\u00FAll\u00FF S\u00EB\u00F1t
[echoproperties] bb.override.list=MP_COPYRIGHTTEXT, "\u00C7\u00F2p\u00FFr\u00ECght 2012 V\u00E8r\u00EDv\u00F3 B\u00F9\u00EEl\u00F0 T\u00E9\u00E0?"
[echoproperties] bb.vendor=V\u00E8r\u00ECv\u00F2
[echoproperties] common.app=VrvPsLTst
[echoproperties] common.app.LoginButtonText=L\u00F2g\u00EC\u00F1
[echoproperties] common.app.LoginProgressMessage=\u00C0\u00F9th\u00E8\u00F1t\u00EC\u00E7\u00E0t\u00ED\u00F2\u00F1 \u00EE\u00F1 pr\u00F3gr\u00E9ss...
[echoproperties] common.app.LoginScreenMessage=W\u00E8l\u00E7\u00F2?\u00E9 M\u00EAss\u00E0g\u00EB
[echoproperties] common.app.PasswordText=P\u00E0ssw\u00F2r\u00F0
[echoproperties] common.app.ServerName=http\://V\u00E8r\u00ECv\u00F2.com
[echoproperties] common.app.UserIdText=\u00D9s\u00E8r \u00CC\u00D0
[echoproperties] common.app.description=Pseudo Loc Test App for Build Script testing
[echoproperties] common.app.name=??
[echoproperties] file.encoding=Cp1252
[echoproperties] file.encoding.pkg=sun.io
[echoproperties] file.separator=\\
[echoproperties] ios.RegistrationText=R\u00E8g\u00ECstr\u00E0t\u00ED\u00F2\u00F1 T\u00E9xt
[echoproperties] ios.RegistrationURL=http\://www.josscrowcroft.com/2011/code/utf-8-multibyte-characters-in-url-parameters-%E2%9C%93/
[echoproperties] java.awt.graphicsenv=sun.awt.Win32GraphicsEnvironment
[echoproperties] java.awt.printerjob=sun.awt.windows.WPrinterJob
[echoproperties] java.class.path=c\:\\ant\\bin\\..\\lib\\ant-launcher.jar;C\:\\Temp\\utf8\\.\\;C\:\\Program Files (x86)\\Java\\jre7\\lib\\ext\\QTJava.zip;C\:\\ant\\lib\\ant-antlr.jar;C\:\\ant\\lib\\ant-apache-bcel.jar;C\:\\ant\\lib\\ant-apache-bsf.jar;C\:\\ant\\lib\\ant-apache-log4j.jar;C\:\\ant\\lib\\ant-apache-oro.jar;C\:\\ant\\lib\\ant-apache-regexp.jar;C\:\\ant\\lib\\ant-apache-resolver.jar;C\:\\ant\\lib\\ant-apache-xalan2.jar;C\:\\ant\\lib\\ant-commons-logging.jar;C\:\\ant\\lib\\ant-commons-net.jar;C\:\\ant\\lib\\ant-contrib-1.0b3.jar;C\:\\ant\\lib\\ant-jai.jar;C\:\\ant\\lib\\ant-javamail.jar;C\:\\ant\\lib\\ant-jdepend.jar;C\:\\ant\\lib\\ant-jmf.jar;C\:\\ant\\lib\\ant-jsch.jar;C\:\\ant\\lib\\ant-junit.jar;C\:\\ant\\lib\\ant-launcher.jar;C\:\\ant\\lib\\ant-netrexx.jar;C\:\\ant\\lib\\ant-nodeps.jar;C\:\\ant\\lib\\ant-starteam.jar;C\:\\ant\\lib\\ant-stylebook.jar;C\:\\ant\\lib\\ant-swing.jar;C\:\\ant\\lib\\ant-testutil.jar;C\:\\ant\\lib\\ant-trax.jar;C\:\\ant\\lib\\ant-weblogic.jar;C\:\\ant\\lib\\ant.jar;C\:\\ant\\lib\\bb-ant-tools.jar;C\:\\ant\\lib\\xercesImpl.jar;C\:\\ant\\lib\\xml-apis.jar;C\:\\Program Files\\Java\\jre7\\lib\\tools.jar
[echoproperties] java.class.version=51.0
[echoproperties] java.endorsed.dirs=C\:\\Program Files\\Java\\jre7\\lib\\endorsed
[echoproperties] java.ext.dirs=C\:\\Program Files\\Java\\jre7\\lib\\ext;C\:\\Windows\\Sun\\Java\\lib\\ext
[echoproperties] java.home=C\:\\Program Files\\Java\\jre7
[echoproperties] java.io.tmpdir=C\:\\Users\\efelton\\AppData\\Local\\Temp\\
[echoproperties] java.library.path=C\:\\Windows\\SYSTEM32;C\:\\Windows\\Sun\\Java\\bin;C\:\\Windows\\system32;C\:\\Windows;C\:\\Windows\\SYSTEM32;C\:\\Windows;C\:\\Windows\\SYSTEM32\\WBEM;C\:\\Windows\\SYSTEM32\\WINDOWSPOWERSHELL\\V1.0\\;C\:\\PROGRAM FILES\\INTEL\\WIFI\\BIN\\;C\:\\PROGRAM FILES\\COMMON FILES\\INTEL\\WIRELESSCOMMON\\;C\:\\PROGRAM FILES (X86)\\MICROSOFT SQL SERVER\\100\\TOOLS\\BINN\\;C\:\\PROGRAM FILES\\MICROSOFT SQL SERVER\\100\\TOOLS\\BINN\\;C\:\\PROGRAM FILES\\MICROSOFT SQL SERVER\\100\\DTS\\BINN\\;C\:\\PROGRAM FILES (X86)\\MICROSOFT SQL SERVER\\100\\TOOLS\\BINN\\VSSHELL\\COMMON7\\IDE\\;C\:\\PROGRAM FILES (X86)\\MICROSOFT SQL SERVER\\100\\DTS\\BINN\\;C\:\\Program Files\\ThinkPad\\Bluetooth Software\\;C\:\\Program Files\\ThinkPad\\Bluetooth Software\\syswow64;C\:\\Program Files (x86)\\QuickTime\\QTSystem\\;C\:\\Program Files (x86)\\AccuRev\\bin;C\:\\Program Files\\Java\\jdk1.7.0_04\\bin;C\:\\Program Files (x86)\\IDM Computer Solutions\\UltraEdit\\;.
[echoproperties] java.runtime.name=Java(TM) SE Runtime Environment
[echoproperties] java.runtime.version=1.7.0_04-b22
[echoproperties] java.specification.name=Java Platform API Specification
[echoproperties] java.specification.vendor=Oracle Corporation
[echoproperties] java.specification.version=1.7
[echoproperties] java.vendor=Oracle Corporation
[echoproperties] java.vendor.url=http\://java.oracle.com/
[echoproperties] java.vendor.url.bug=http\://bugreport.sun.com/bugreport/
[echoproperties] java.version=1.7.0_04
[echoproperties] java.vm.info=mixed mode
[echoproperties] java.vm.name=Java HotSpot(TM) 64-Bit Server VM
[echoproperties] java.vm.specification.name=Java Virtual Machine Specification
[echoproperties] java.vm.specification.vendor=Oracle Corporation
[echoproperties] java.vm.specification.version=1.7
[echoproperties] java.vm.vendor=Oracle Corporation
[echoproperties] java.vm.version=23.0-b21
[echoproperties] line.separator=\r\n
[echoproperties] os.arch=amd64
[echoproperties] os.name=Windows 7
[echoproperties] os.version=6.1
[echoproperties] path.separator=;
[echoproperties] sun.arch.data.model=64
[echoproperties] sun.boot.class.path=C\:\\Program Files\\Java\\jre7\\lib\\resources.jar;C\:\\Program Files\\Java\\jre7\\lib\\rt.jar;C\:\\Program Files\\Java\\jre7\\lib\\sunrsasign.jar;C\:\\Program Files\\Java\\jre7\\lib\\jsse.jar;C\:\\Program Files\\Java\\jre7\\lib\\jce.jar;C\:\\Program Files\\Java\\jre7\\lib\\charsets.jar;C\:\\Program Files\\Java\\jre7\\lib\\jfr.jar;C\:\\Program Files\\Java\\jre7\\classes
[echoproperties] sun.boot.library.path=C\:\\Program Files\\Java\\jre7\\bin
[echoproperties] sun.cpu.endian=little
[echoproperties] sun.cpu.isalist=amd64
[echoproperties] sun.desktop=windows
[echoproperties] sun.io.unicode.encoding=UnicodeLittle
[echoproperties] sun.java.command=org.apache.tools.ant.launch.Launcher -cp .;C\:\\Program Files (x86)\\Java\\jre7\\lib\\ext\\QTJava.zip
[echoproperties] sun.java.launcher=SUN_STANDARD
[echoproperties] sun.jnu.encoding=Cp1252
[echoproperties] sun.management.compiler=HotSpot 64-Bit Tiered Compilers
[echoproperties] sun.os.patch.level=Service Pack 1
[echoproperties] user.country=US
[echoproperties] user.dir=C\:\\Temp\\utf8
[echoproperties] user.home=C\:\\Users\\efelton
[echoproperties] user.language=en
[echoproperties] user.name=efelton
[echoproperties] user.script=
[echoproperties] user.timezone=
[echoproperties] user.variant=

BUILD SUCCESSFUL
Total time: 1 second

Thank you for your help

EDIT\UPDATE 6/19/2012

I am developing in a Windows environment.

I have installed a TTF from: http://freedesktop.org/wiki/Software/CJKUnifonts/Download

I have updated UltraEdit to use the TTF and I can see the Chinese characters.

<?xml version="1.0" encoding="UTF-8" ?>

<project name="utf8test" default="all" basedir="."> 

   <target name="all">        

      <echo>我們</echo>
      <echo encoding="ISO-8859-1">ISO-8859-1 我們</echo> 
      <echo encoding="UTF-8">UTF-8 我們</echo> 


      <echo file="echo_output.txt" append="true" >我們 ${line.separator}</echo>
      <echo file="echo_output.txt" append="true"  encoding="ISO-8859-1">ISO-8859-1 我們 ${line.separator}</echo> 
      <echo file="echo_output.txt" append="true"  encoding="UTF-8">UTF-8 我們 ${line.separator}</echo> 
      <echo file="echo_output.txt" append="true"  encoding="UnicodeLittle">UnicodeLittle 我們 ${line.separator}</echo> 
      <echo file="echo_output.txt" append="true"  encoding="UnicodeLittleUnmarked">UnicodeLittleUnmarked 我們 ${line.separator}</echo> 

   </target> 
</project> 

The output captured by running inside UltraEdit is: Buildfile: E:\temp\utf8\build.xml

    all:
         [echo] ??
         [echo] ISO-8859-1 ??
         [echo] UTF-8 ??

    BUILD SUCCESSFUL
    Total time: 1 second

And the echo_output.txt file shows up like this:

    ?? 
    ISO-8859-1 ?? 
    UTF-8 ?? 
    ÿþU n i c o d e L i t t l e   ? ?   

     U n i c o d e L i t t l e U n m a r k e d   ? ?   

So there appears to be somehting fundamentally wrong with how my ANT environment is set up since I cannot simply echo the character to the screen or to a file.

M A
  • 71,713
  • 13
  • 134
  • 174
efelton
  • 181
  • 3
  • 9

2 Answers2

0

The java.util.Properites class uses the ISO 8859-1 encoding. The following worked when tested with Ant 1.8.2.

build.xml

<?xml version="1.0" encoding="UTF-8" ?>
<project name="utf8test" default="all" basedir=".">

<target name="all">
  <loadproperties encoding="ISO-8859-1" srcfile="./apps.properties.all.txt"  />

  <echo>No encoding ${common.app.name}</echo>
  <echo encoding="ISO-8859-1">ISO-8859-1 ${common.app.name}</echo>
</target>
</project>

Output

all:
     [echo] No encoding æå
     [echo] ISO-8859-1 我們

BUILD SUCCESSFUL
Christopher Peisert
  • 21,862
  • 3
  • 86
  • 117
  • 1
    I downloaded ant 1.8.4, and ran your script and I got `[echo] ISO-8859-1 æ??å??` as output. Could my JAVA version be affecting my results? – efelton Jun 18 '12 at 20:56
  • The issue you are seeing is related to the Windows command line. See [What encoding/code page is cmd.exe using](http://stackoverflow.com/questions/1259084/what-encoding-code-page-is-cmd-exe-using) and [How to Enable More Fonts for Windows Command Prompt](http://www.mydigitallife.info/how-to-enable-more-fonts-for-windows-command-prompt/) – Christopher Peisert Jun 19 '12 at 03:55
  • 1
    @efelton With Linux, the above solution works "out of the box". Testing on Windows 7, I found that even after setting the code page to UTF-8 using `chcp 65001` still did not display Han characters correctly using font `Lucida Console`. The easiest solution I found on Windows was to run the Ant build inside Intellij IDEA and echoing to a file. – Christopher Peisert Jun 19 '12 at 05:22
0

I solved my problem on Windows and MacOS by passing all input (properties files) through this encoder first. ANT can then correctly read, and then write, the values when the input is escaped in this manner.

 StringBuffer buffer = new StringBuffer();
 try 
 {
            FileInputStream fis = new FileInputStream(input);
            InputStreamReader isr = new InputStreamReader(fis, "UTF8");
            Reader in = new BufferedReader(isr);    
            int ch;
            while ((ch = in.read()) > -1)
            {
                if (ch > 127 || ch < 0) 
                {
                    String hex = Integer.toHexString(ch);
                    switch (hex.length()) 
                    {
                        case 1:
                        buffer.append("\\u000");
                        break;
                        case 2:
                        buffer.append("\\u00");
                        break;
                        case 3:
                        buffer.append("\\u0");
                        break;
                        case 4:
                        default:
                        buffer.append("\\u");
                        break;
                    }
                    buffer.append(hex);
                } 
                else if (ch != 0) 
                {
                    buffer.append((char) ch);
                }
            }//while
            in.close();

        //System.out.println(buffer.toString());

    }//try
    catch (IOException e) 
    {
            //e.printStackTrace();
            throw new BuildException(e.getMessage() );
    }
try 
    {
        FileOutputStream fos = new FileOutputStream(dest);
        Writer out = new OutputStreamWriter(fos, "windows-1252");
        out.write(buffer.toString());
        out.close();
    }
        catch (IOException e) 
    {
        throw new BuildException(e.getMessage() );
    }
efelton
  • 181
  • 3
  • 9