I've been reading about data-oriented programming in the context of Entity Component Systems. Apparently, using a struct of arrays can more effectively leverage the cache and give substantial performance increases. Basically, if all of the data you're iterating over is contiguous, then cache locality is leveraged to give a large performance increase.
Because I'll be working in Javascript, I figured I'd first devise a small little benchmark to see how much of a performance increase is possible under ideal conditions. I made it very simple. In the first test I benchmark the speed of iterating over an array of structs, and in the second I benchmark the speed of iterating over a struct of arrays.
Here is the code:
function randomInt() { return Math.floor(Math.random() * 100) + 1; }
function randomStr() { return Math.random().toString(36).substring(7); }
let samples = 1000;
let count = 10000000;
function benchmarkArrayOfStructs() {
let AOS = [];
for (let i = 0; i < count; i++) {
AOS.push({ health: randomInt(), name: randomStr(), damage: randomInt() });
}
let t1 = performance.now();
let sum = 0;
for (let x = 0; x < samples; x++) {
for (let i = 0; i < AOS.length; i++) {
let item = AOS[i];
sum += item.health + item.damage;
}
}
console.log(performance.now() - t1);
}
function benchmarkStructOfArrays() {
let SOA = { health: [], name: [], damage: [] }
for (let i = 0; i < count; i++) {
SOA.health.push(randomInt());
SOA.name.push(randomStr());
SOA.damage.push(randomInt());
}
let t2 = performance.now();
let sum = 0;
let h = SOA.health;
let d = SOA.damage;
for (let x = 0; x < samples; x++) {
for (let i = 0; i < count; i++) {
sum += h[i] + d[i];
}
}
console.log(performance.now() - t2);
}
benchmarkArrayOfStructs();
benchmarkStructOfArrays();
Interestingly, the latter solution is only 20% or so faster than the first solution. In the various talks that I've watched, they've claimed 10x speed increases for this type of operation. Also, intuitively I feel as if the latter solution should be much faster, but it isn't. Now I'm beginning to wonder if this sort of optimization is even worth integrating into my project, as it severely decreases ergonomics. Have I done something wrong in my benchmark, or is this the actual expected speedup?