Just playing around with some C# code and found that the time taken to scan an in memory array depends on the object size.
Let me explain, for two collections same length but different object size, the time taken to loop over is bigger for big objects.
Testing with Linqpad:
- If I have an array of 20M
SimpleObject
objects looping over all takes ~221 ms - If I have an array of 20M
BigObject
objects looping over all takes ~756 ms
Why time is not close to constant ? Should it not be using kind of
pointer arithmetic ?
Thanks
public class SmallObject{
public int JustAnInt0;
public static SmallObject[] FakeList(int size){
var res = new SmallObject[size];
for(var c = 0; c != size; ++c)
res[c] = new SmallObject();
return res;
}
}
public class MediumObject{
public int JustAnInt0;
public int JustAnInt1;
public int JustAnInt2;
public int JustAnInt3;
public int JustAnInt4;
public static MediumObject[] FakeList(int size){
var res = new MediumObject[size];
for(var c = 0; c != size; ++c)
res[c] = new MediumObject();
return res;
}
}
public class BigObject{
public int JustAnInt0;
public int JustAnInt1;
public int JustAnInt2;
public int JustAnInt3;
public int JustAnInt4;
public int JustAnInt5;
public int JustAnInt6;
public int JustAnInt7;
public int JustAnInt8;
public int JustAnInt9;
public int JustAnInt10;
public int JustAnInt11;
public int JustAnInt12;
public int JustAnInt13;
public int JustAnInt14;
public int JustAnInt15;
public int JustAnInt16;
public int JustAnInt17;
public int JustAnInt18;
public int JustAnInt19;
public static BigObject[] FakeList(int size){
var res = new BigObject[size];
for(var c = 0; c != size; ++c)
res[c] = new BigObject();
return res;
}
}
void Main()
{
var size = 30000000;
var small = SmallObject.FakeList(size);
var medium = MediumObject.FakeList(size);
var big = BigObject.FakeList(size);
var sw = System.Diagnostics.Stopwatch.StartNew();
for(var c = 0; c != size; ++c){
small[c].JustAnInt0++;
}
string.Format("Scan small list took {0}", sw.ElapsedMilliseconds).Dump();
sw.Restart();
for(var c = 0; c != size; ++c){
medium[c].JustAnInt0++;
}
string.Format("Scan medium list took {0}", sw.ElapsedMilliseconds).Dump();
sw.Restart();
for(var c = 0; c != size; ++c){
big[c].JustAnInt0++;
}
string.Format("Scan big list took {0}", sw.ElapsedMilliseconds).Dump();
}
// Define other methods and classes here
UPDATE:
In this case @IanMercer comment, plus @erisco, pointed me in the right way, so after tweaking a bit the objects I get the expected behavior. Basically what I did is wrap extra data into an object. In this way small, medium and big have more or less the same size able to fit with CPU caches. Now the test shows same times.
public class SmallObject{
public int JustAnInt0;
public static SmallObject[] FakeList(int size){
var res = new SmallObject[size];
for(var c = 0; c != size; ++c)
res[c] = new SmallObject();
return res;
}
}
public class MediumObject{
public int JustAnInt0;
public class Extra{
public int JustAnInt1;
public int JustAnInt2;
public int JustAnInt3;
public int JustAnInt4;
}
public Extra ExtraData;
public static MediumObject[] FakeList(int size){
var res = new MediumObject[size];
for(var c = 0; c != size; ++c)
res[c] = new MediumObject();
return res;
}
}
public class BigObject{
public int JustAnInt0;
public class Extra{
public int JustAnInt1;
public int JustAnInt2;
public int JustAnInt3;
public int JustAnInt4;
public int JustAnInt5;
public int JustAnInt6;
public int JustAnInt7;
public int JustAnInt8;
public int JustAnInt9;
public int JustAnInt10;
public int JustAnInt11;
public int JustAnInt12;
public int JustAnInt13;
public int JustAnInt14;
public int JustAnInt15;
public int JustAnInt16;
public int JustAnInt17;
public int JustAnInt18;
public int JustAnInt19;
}
public Extra ExtraData;
public static BigObject[] FakeList(int size){
var res = new BigObject[size];
for(var c = 0; c != size; ++c)
res[c] = new BigObject();
return res;
}
}
void Main()
{
var size = 30000000;
var small = SmallObject.FakeList(size);
var medium = MediumObject.FakeList(size);
var big = BigObject.FakeList(size);
var times = Enumerable
.Range(0, 10)
.Select(r => {
var sw = System.Diagnostics.Stopwatch.StartNew();
for(var c = 0; c != size; ++c){
small[c].JustAnInt0++;
}
// string.Format("Scan small list took {0}", sw.ElapsedMilliseconds).Dump();
var smalltt = sw.ElapsedMilliseconds;
sw.Restart();
for(var c = 0; c != size; ++c){
big[c].JustAnInt0++;
}
// string.Format("Scan big list took {0}", sw.ElapsedMilliseconds).Dump();
var bigtt = sw.ElapsedMilliseconds;
sw.Restart();
for(var c = 0; c != size; ++c){
medium[c].JustAnInt0++;
}
//string.Format("Scan medium list took {0}", sw.ElapsedMilliseconds).Dump();
var mediumtt = sw.ElapsedMilliseconds;
return new {
smalltt,
mediumtt,
bigtt
};
})
.ToArray();
(new {
Small = times.Average(t => t.smalltt),
Medium = times.Average(t => t.mediumtt),
Big = times.Average(t => t.bigtt)
}).Dump();
}
Some useful links:
- http://igoro.com/archive/gallery-of-processor-cache-effects/
- https://msdn.microsoft.com/en-us/library/ms973852.aspx
- https://technet.microsoft.com/en-us/sysinternals/cc835722.aspx
Thank you all!