I am testing the feasibility of compressing some messaging between Java and C#.
The messaging used ranges from small strings (40bytes) to larger strings (4K).
I have found differences in the output of Java GZIP implementation to the dot Net GZIP implementation.
I'm guessing that dot Net has a larger header that is causing the large overhead.
I prefer the Java implementation as it works better on small strings, and would like the dot Net to achieve similar results.
Output, Java version 1.6.0_10
Text:EncodeDecode
Bytes:(12 bytes)RW5jb2RlRGVjb2Rl <- Base64
Compressed:(29)H4sIAAAAAAAAAHPNS85PSXVJBZEAd9jYdgwAAAA=
Decompressed:(12)RW5jb2RlRGVjb2Rl
Converted:EncodeDecode
Text:EncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecode
Bytes:(120)RW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2Rl
Compressed:(33)H4sIAAAAAAAAAHPNS85PSXVJBZGudGQDAOcKnrd4AAAA
Decompressed:(120)RW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2Rl
Converted:EncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecode
Output, dot Net 2.0.50727
Text:EncodeDecode
Bytes:(12)RW5jb2RlRGVjb2Rl
Compressed:(128)H4sIAAAAAAAEAO29B2AcSZYlJi9tynt/SvVK1+B0oQiAYBMk2JBAEOzBiM3mkuwdaUcjKasqgcplVmVdZhZAzO2dvPfee++999577733ujudTif33/8/XGZkAWz2zkrayZ4hgKrIHz9+fB8/Ik6X02qWP83x7/8Dd9jYdgwAAAA=
Decompressed:(12)RW5jb2RlRGVjb2Rl
Text:EncodeDecode
Text:EncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecode
Bytes:(120)RW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2Rl
Compressed:(131)H4sIAAAAAAAEAO29B2AcSZYlJi9tynt/SvVK1+B0oQiAYBMk2JBAEOzBiM3mkuwdaUcjKasqgcplVmVdZhZAzO2dvPfee++999577733ujudTif33/8/XGZkAWz2zkrayZ4hgKrIHz9+fB8/Ik6X02qWP83x7w/z9/8H5wqet3gAAAA=
Decompressed:(120)RW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2RlRW5jb2RlRGVjb2Rl
Text:EncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecodeEncodeDecode
How can I achieve the smaller sized encoding on the dot Net side?
Note,
Java implementation can decode dot Net implementation and
dot Net implementation can decode Java implementation.
Java Code
@Test
public void testEncodeDecode()
{
final String strTitle = "EncodeDecode";
try
{
debug( "Text:" + strTitle );
byte[] ba = strTitle.getBytes( "UTF-8" );
debug( "Bytes:" + toString( ba ) );
byte[] eba = encode_GZIP( ba );
debug( "Encoded:" + toString( eba ) );
byte[] ba2 = decode_GZIP( eba );
debug( "Decoded:" + toString( ba2 ) );
debug( "Converted:" + new String( ba2, "UTF-8" ) );
}
catch( Exception ex ) { fail( ex ); }
}
@Test
public void testEncodeDecode2()
{
final String strTitle = "EncodeDecode";
try
{
StringBuilder sb = new StringBuilder();
for( int i = 0 ; i < 10 ; i++ ) sb.append( strTitle );
debug( "Text:" + sb.toString() );
byte[] ba = sb.toString().getBytes( ENCODING );
debug( "Bytes:" + toString( ba ) );
byte[] eba = encode_GZIP( ba );
debug( "Encoded:" + toString( eba ) );
byte[] ba2 = decode_GZIP( eba );
debug( "Decoded:" + toString( ba2 ) );
debug( "Converted:" + new String( ba2, ENCODING ) );
}
catch( Exception ex ) { fail( ex ); }
}
private String toString( byte[] ba )
{
return "("+ba.length+")"+Base64.byteArrayToBase64( ba );
}
protected static byte[] encode_GZIP( byte[] baData ) throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ByteArrayInputStream bais = new ByteArrayInputStream( baData );
GZIPOutputStream zos = new GZIPOutputStream( baos );
byte[] baBuf = new byte[ 1024 ];
int nSize;
while( -1 != ( nSize = bais.read( baBuf ) ) )
{
zos.write( baBuf, 0, nSize );
zos.flush();
}
Utilities.closeQuietly( zos );
Utilities.closeQuietly( bais );
return baos.toByteArray();
}
protected static byte[] decode_GZIP( byte[] baData ) throws IOException
{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ByteArrayInputStream bais = new ByteArrayInputStream( baData );
GZIPInputStream zis = new GZIPInputStream( bais );
byte[] baBuf = new byte[ 1024 ];
int nSize;
while( -1 != ( nSize = zis.read( baBuf ) ) )
{
baos.write( baBuf, 0, nSize );
baos.flush();
}
Utilities.closeQuietly( zis );
Utilities.closeQuietly( bais );
return baos.toByteArray();
}
private void debug( Object o ) { System.out.println( o ); }
private void fail( Exception ex )
{
ex.printStackTrace();
Assert.fail( ex.getMessage() );
}
dot Net Code
[Test]
public void TestJava6()
{
string strData = "EncodeDecode";
Console.WriteLine("Text:" + strData);
byte[] baData = Encoding.UTF8.GetBytes(strData);
Console.WriteLine("Bytes:" + toString(baData));
byte[] ebaData2 = encode_GZIP(baData);
Console.WriteLine("Encoded:" + toString(ebaData2));
byte[] baData2 = decode_GZIP(ebaData2);
Console.WriteLine("Decoded:" + toString(baData2));
Console.WriteLine("Text:" + Encoding.UTF8.GetString(baData2));
}
[Test]
public void TestJava7()
{
string strData = "EncodeDecode";
StringBuilder sb = new StringBuilder();
for (int i = 0; i < 10; i++) sb.Append(strData);
Console.WriteLine("Text:" + sb.ToString());
byte[] baData = Encoding.UTF8.GetBytes(sb.ToString());
Console.WriteLine("Bytes:" + toString(baData));
byte[] ebaData2 = encode_GZIP(baData);
Console.WriteLine("Encoded:" + toString(ebaData2));
byte[] baData2 = decode_GZIP(ebaData2);
Console.WriteLine("Decoded:" + toString(baData2));
Console.WriteLine("Text:" + Encoding.UTF8.GetString(baData2));
}
public string toString(byte[] ba)
{
return "(" + ba.Length + ")" + Convert.ToBase64String(ba);
}
protected static byte[] decode_GZIP(byte[] ba)
{
MemoryStream writer = new MemoryStream();
using (GZipStream zis = new GZipStream(new MemoryStream(ba), CompressionMode.Decompress))
{
Utilities.CopyStream(zis, writer);
}
return writer.ToArray();
}
protected static byte[] encode_GZIP(byte[] ba)
{
using (MemoryStream reader = new MemoryStream(ba))
{
MemoryStream writer = new MemoryStream();
using (GZipStream zos = new GZipStream(writer, CompressionMode.Compress))
{
Utilities.CopyStream(reader, zos);
}
return writer.ToArray();
}
}