As I explained in the comment of that SO question, the issue is caused (at least the case I have discovered) by optimisation of the string output. It seems if strings are the same reference, then it will output it once.
So what we the sample code does it to use a long string for properties of an object and change the reference of one string and then serialise. Then deserialise the stream back again to object (and this time since the string is interned, same reference is used) and then serialise again. This time the stream is smaller.
OK, here is the proof code:
[Serializable]
public class Proof
{
public string S1 { get; set; }
public string S2 { get; set; }
public string S3 { get; set; }
}
class Program
{
static void Main(string[] args)
{
const string LongString =
"A value that is going to change the world nad iasjdsioajdsadj sai sioadj sioadj siopajsa iopsja iosadio jsadiojasd ";
var proof = new Proof() {
S1 = LongString,
S2 = LongString,
S3 = LongString
};
proof.S2 = LongString.Substring(0, 10) + LongString.Substring(10); // just add up first 10 character with the rest.
//This just makes sure reference is not the same although values will be
Console.WriteLine(proof.S1 == proof.S2);
Console.WriteLine(proof.S1 == proof.S3);
Console.WriteLine(proof.S2 == proof.S3);
Console.WriteLine("So the values are all the same...");
BinaryFormatter bf = new BinaryFormatter();
MemoryStream stream = new MemoryStream();
bf.Serialize(stream, proof);
byte[] buffer = stream.ToArray();
Console.WriteLine("buffer length is " + buffer.Length); // outputs 449 on my machine
stream.Position = 0;
var deserProof = (Proof) bf.Deserialize(new MemoryStream(buffer));
deserProof.S1 = deserProof.S2;
deserProof.S3 = deserProof.S2;
MemoryStream stream2 = new MemoryStream();
new BinaryFormatter().Serialize(stream2, deserProof);
Console.WriteLine("buffer length now is " + stream2.ToArray().Length); // outputs 333 on my machine!!
Console.WriteLine("What? I cannot believe my eyes! Someone help me ........");
Console.Read();
}