7

can someone tell me why I'm loosing information doing this process ? Some utf8 chars appears not decoded : "Biography":"\u003clink type=... or Steve Blunt \u0026 Marty Kelley but others do : "Name":"朱敬

// Creating a 64bit string containing gzip data
string bar;
using (MemoryStream ms = new MemoryStream())
{
    using (GZipStream gzip = new GZipStream(ms, CompressionMode.Compress))
    using (StreamWriter writer = new StreamWriter(gzip, System.Text.Encoding.UTF8))
    {
        writer.Write(s);
    }
    ms.Flush();
    bar = Convert.ToBase64String(ms.ToArray());
}

// Reading it
string foo;
byte[] itemData = Convert.FromBase64String(bar);
using (MemoryStream src = new MemoryStream(itemData))
using (GZipStream gzs = new GZipStream(src, CompressionMode.Decompress))
using (MemoryStream dest = new MemoryStream(itemData.Length*2))
{
    gzs.CopyTo(dest);
    foo = Encoding.UTF8.GetString(dest.ToArray());
}

Console.WriteLine(foo);
Stephen Ostermiller
  • 23,933
  • 14
  • 88
  • 109
dekajoo
  • 2,024
  • 1
  • 25
  • 36
  • 1
    There's a semicolon after your first `using` statement that probably doesn't belong there. – Rik May 28 '14 at 09:47
  • 1
    I can't reproduce it with your code, it works fine for me. Are you sure the characters aren't already encoded in the source string? – Rik May 28 '14 at 09:49

2 Answers2

4

It could be because you are writing the string using StreamWriter but reading it using CopyTo() and Encoding.GetString().

What happens if you try this?

// Reading it
string foo;
byte[] itemData = Convert.FromBase64String(bar);
using (MemoryStream src = new MemoryStream(itemData))
using (GZipStream gzs = new GZipStream(src, CompressionMode.Decompress))
using (StreamReader reader = new StreamReader(gzs, Encoding.UTF8))
{
    foo = reader.ReadLine();
}

Although I think you should be using BinaryReader and BinaryWriter:

string s = "Biography:\u003clink type...";
string bar;
using (MemoryStream ms = new MemoryStream())
{
    using (GZipStream gzip = new GZipStream(ms, CompressionMode.Compress))
    using (var writer = new BinaryWriter(gzip, Encoding.UTF8))
    {
        writer.Write(s);
    }
    ms.Flush();
    bar = Convert.ToBase64String(ms.ToArray());
}

// Reading it
string foo;
byte[] itemData = Convert.FromBase64String(bar);
using (MemoryStream src = new MemoryStream(itemData))
using (GZipStream gzs = new GZipStream(src, CompressionMode.Decompress))
using (var reader = new BinaryReader(gzs, Encoding.UTF8))
{
    foo = reader.ReadString();
}

Console.WriteLine(foo);
Matthew Watson
  • 104,400
  • 10
  • 158
  • 276
  • reader.ReadString --> "the string is prefixed with the length" (at least in VB.NET) Totally messes up my Decompress since I wasn't using BinaryWriter – Fuhrmanator Aug 17 '14 at 02:16
  • @Fuhrmanator Indeed that would mess it up. That's why my example above is careful to use BinaryWriter with BinaryReader. – Matthew Watson Aug 18 '14 at 07:44
0

The issue was simply that the characters were already encoded in the source string.

Ps : Credit goes to rik for this answer :)

Edit : I also had the StreamReader issue matthew-watson was suggesting.

Community
  • 1
  • 1
dekajoo
  • 2,024
  • 1
  • 25
  • 36