Your problem is perhaps connected to "sequential" Guid
, like:
c482fbe1-9f16-4ae9-a05c-383478ec9d11
c482fbe1-9f16-4ae9-a05c-383478ec9d12
c482fbe1-9f16-4ae9-a05c-383478ec9d13
c482fbe1-9f16-4ae9-a05c-383478ec9d14
c482fbe1-9f16-4ae9-a05c-383478ec9d15
The Dictionary<,>
has a problem with those, because they often have the same GetHashCode()
, so it has to do some tricks that change the search time from O(1)
to O(n)
... You can solve it by using a custom equality comparer that calculates the hash in a different way, like:
public class ReverseGuidEqualityComparer : IEqualityComparer<Guid>
{
public static readonly ReverseGuidEqualityComparer Default = new ReverseGuidEqualityComparer();
#region IEqualityComparer<Guid> Members
public bool Equals(Guid x, Guid y)
{
return x.Equals(y);
}
public int GetHashCode(Guid obj)
{
var bytes = obj.ToByteArray();
uint hash1 = (uint)bytes[0] | ((uint)bytes[1] << 8) | ((uint)bytes[2] << 16) | ((uint)bytes[3] << 24);
uint hash2 = (uint)bytes[4] | ((uint)bytes[5] << 8) | ((uint)bytes[6] << 16) | ((uint)bytes[7] << 24);
uint hash3 = (uint)bytes[8] | ((uint)bytes[9] << 8) | ((uint)bytes[10] << 16) | ((uint)bytes[11] << 24);
uint hash4 = (uint)bytes[12] | ((uint)bytes[13] << 8) | ((uint)bytes[14] << 16) | ((uint)bytes[15] << 24);
int hash = 37;
unchecked
{
hash = hash * 23 + (int)hash1;
hash = hash * 23 + (int)hash2;
hash = hash * 23 + (int)hash3;
hash = hash * 23 + (int)hash4;
}
return hash;
}
#endregion
}
Then you simply declare the dictionary like this:
var dict = new Dictionary<Guid, Element>(ReverseGuidEqualityComparer.Default);
a little test to see the difference:
private static void Increment(byte[] x)
{
for (int i = x.Length - 1; i >= 0; i--)
{
if (x[i] != 0xFF)
{
x[i]++;
return;
}
x[i] = 0;
}
}
and
// You can try timing this program with the default GetHashCode:
//var dict = new Dictionary<Guid, object>();
var dict = new Dictionary<Guid, object>(ReverseGuidEqualityComparer.Default);
var hs1 = new HashSet<int>();
var hs2 = new HashSet<int>();
{
var guid = Guid.NewGuid();
Stopwatch sw = Stopwatch.StartNew();
for (int i = 0; i < 1500000; i++)
{
hs1.Add(ReverseGuidEqualityComparer.Default.GetHashCode(guid));
hs2.Add(guid.GetHashCode());
dict.Add(guid, new object());
var bytes = guid.ToByteArray();
Increment(bytes);
guid = new Guid(bytes);
}
sw.Stop();
Console.WriteLine("Milliseconds: {0}", sw.ElapsedMilliseconds);
}
Console.WriteLine("ReverseGuidEqualityComparer distinct hashes: {0}", hs1.Count);
Console.WriteLine("Guid.GetHashCode() distinct hashes: {0}", hs2.Count);
With sequential Guid
the difference in the number of distinct hash codes is staggering:
ReverseGuidEqualityComparer distinct hashes: 1500000
Guid.GetHashCode() distinct hashes: 256
Now... If you don't want to use ToByteArray()
(because it allocates useless memory), there is a solution that uses reflection and expression trees... It should work correctly with Mono, because Mono "aligned" its implementation of Guid
to the one of Microsoft in 2004, that is ancient time :-)
public class ReverseGuidEqualityComparer : IEqualityComparer<Guid>
{
public static readonly ReverseGuidEqualityComparer Default = new ReverseGuidEqualityComparer();
public static readonly Func<Guid, int> GetHashCodeFunc;
static ReverseGuidEqualityComparer()
{
var par = Expression.Parameter(typeof(Guid));
var hash = Expression.Variable(typeof(int));
var const23 = Expression.Constant(23);
var const8 = Expression.Constant(8);
var const16 = Expression.Constant(16);
var const24 = Expression.Constant(24);
var b = Expression.Convert(Expression.Convert(Expression.Field(par, "_b"), typeof(ushort)), typeof(uint));
var c = Expression.Convert(Expression.Convert(Expression.Field(par, "_c"), typeof(ushort)), typeof(uint));
var d = Expression.Convert(Expression.Field(par, "_d"), typeof(uint));
var e = Expression.Convert(Expression.Field(par, "_e"), typeof(uint));
var f = Expression.Convert(Expression.Field(par, "_f"), typeof(uint));
var g = Expression.Convert(Expression.Field(par, "_g"), typeof(uint));
var h = Expression.Convert(Expression.Field(par, "_h"), typeof(uint));
var i = Expression.Convert(Expression.Field(par, "_i"), typeof(uint));
var j = Expression.Convert(Expression.Field(par, "_j"), typeof(uint));
var k = Expression.Convert(Expression.Field(par, "_k"), typeof(uint));
var sc = Expression.LeftShift(c, const16);
var se = Expression.LeftShift(e, const8);
var sf = Expression.LeftShift(f, const16);
var sg = Expression.LeftShift(g, const24);
var si = Expression.LeftShift(i, const8);
var sj = Expression.LeftShift(j, const16);
var sk = Expression.LeftShift(k, const24);
var body = Expression.Block(new[]
{
hash
},
new Expression[]
{
Expression.Assign(hash, Expression.Constant(37)),
Expression.MultiplyAssign(hash, const23),
Expression.AddAssign(hash, Expression.Field(par, "_a")),
Expression.MultiplyAssign(hash, const23),
Expression.AddAssign(hash, Expression.Convert(Expression.Or(b, sc), typeof(int))),
Expression.MultiplyAssign(hash, const23),
Expression.AddAssign(hash, Expression.Convert(Expression.Or(d, Expression.Or(se, Expression.Or(sf, sg))), typeof(int))),
Expression.MultiplyAssign(hash, const23),
Expression.AddAssign(hash, Expression.Convert(Expression.Or(h, Expression.Or(si, Expression.Or(sj, sk))), typeof(int))),
hash
});
GetHashCodeFunc = Expression.Lambda<Func<Guid, int>>(body, par).Compile();
}
#region IEqualityComparer<Guid> Members
public bool Equals(Guid x, Guid y)
{
return x.Equals(y);
}
public int GetHashCode(Guid obj)
{
return GetHashCodeFunc(obj);
}
#endregion
// For comparison purpose, not used
public int GetHashCodeSimple(Guid obj)
{
var bytes = obj.ToByteArray();
unchecked
{
int hash = 37;
hash = hash * 23 + (int)((uint)bytes[0] | ((uint)bytes[1] << 8) | ((uint)bytes[2] << 16) | ((uint)bytes[3] << 24));
hash = hash * 23 + (int)((uint)bytes[4] | ((uint)bytes[5] << 8) | ((uint)bytes[6] << 16) | ((uint)bytes[7] << 24));
hash = hash * 23 + (int)((uint)bytes[8] | ((uint)bytes[9] << 8) | ((uint)bytes[10] << 16) | ((uint)bytes[11] << 24));
hash = hash * 23 + (int)((uint)bytes[12] | ((uint)bytes[13] << 8) | ((uint)bytes[14] << 16) | ((uint)bytes[15] << 24));
return hash;
}
}
}
Other solution, based on "undocumented but working" programming (tested on .NET and Mono):
public class ReverseGuidEqualityComparer : IEqualityComparer<Guid>
{
public static readonly ReverseGuidEqualityComparer Default = new ReverseGuidEqualityComparer();
#region IEqualityComparer<Guid> Members
public bool Equals(Guid x, Guid y)
{
return x.Equals(y);
}
public int GetHashCode(Guid obj)
{
GuidToInt32 gtoi = new GuidToInt32 { Guid = obj };
unchecked
{
int hash = 37;
hash = hash * 23 + gtoi.Int32A;
hash = hash * 23 + gtoi.Int32B;
hash = hash * 23 + gtoi.Int32C;
hash = hash * 23 + gtoi.Int32D;
return hash;
}
}
#endregion
[StructLayout(LayoutKind.Explicit)]
private struct GuidToInt32
{
[FieldOffset(0)]
public Guid Guid;
[FieldOffset(0)]
public int Int32A;
[FieldOffset(4)]
public int Int32B;
[FieldOffset(8)]
public int Int32C;
[FieldOffset(12)]
public int Int32D;
}
}
It uses the StructLayout
"trick" to superimpose a Guid
to a bunch of int
, write to the Guid
and read the int
.
Why does Guid.GetHashCode() has problems with sequential ids?
Very easy to explain: from the reference source, the GetHashCode()
is:
return _a ^ (((int)_b << 16) | (int)(ushort)_c) ^ (((int)_f << 24) | _k);
so the _d
, _e
, _g
, _h
, _i
, _j
byte
s aren't part of the hash code. When incremented a Guid
is first incremented in the _k
field (256 values), then on overflow in the _j
field (256 * 256 values, so 65536 values), then on the _i
field (16777216 values). Clearly by not hashing the _h
, _i
, _j
fields the hash of a sequential Guid
will only show 256 different values for non-huge range of Guid
(or at maximum 512 different values, if the _f
field is incremented once, like if you start with a Guid
similar to 12345678-1234-1234-1234-aaffffffff00
, where aa
(that is "our" _f
) will be incremented to ab
after 256 increments of the Guid
)