I was playing around with a simple "game" to test different aspects of Data Oriented Design when I stumble across this strange performance drop.
I have this struct to store data of the game ships:
constexpr int MAX_ENEMY_SHIPS = 4000000;
struct Ships
{
int32_t count;
v2 pos[MAX_ENEMY_SHIPS];
ShipMovement movements[MAX_ENEMY_SHIPS];
ShipDrawing drawings[MAX_ENEMY_SHIPS];
//ShipOtherData other[MAX_ENEMY_SHIPS];
void Add(Ship ship)
{
pos[count] = ship.pos;
movements[count] = { ship.dir, ship.speed };
drawings[count] = { ship.size, ship.color };
//other[count] = { ship.a, ship.b, ship.c, ship.d };
count++;
}
};
Then I have a function to update the movement data:
void MoveShips(v2* positions, ShipMovement* movements, int count)
{
ScopeBenchmark bench("Move Ships");
for(int i = 0; i < count; ++i)
{
positions[i] = positions[i] + (movements[i].dir * movements[i].speed);
}
}
My understanding is that, as the MoveShips function only uses the position and movement arrays, extra memory in the Ships struct will not affect its performance. However, when I uncomment the commented lines on the Ships struct, the performance drops a lot. With the current value of MAX_ENEMY_SHIPS, the duration of the MoveShips function in my computer goes from 10-11 ms to 200-210 ms.
Here I let a minimum, reproducible example:
#include <stdlib.h>
#include <stdio.h>
#include <chrono>
#include <string>
class ScopeBenchmark
{
public:
ScopeBenchmark(std::string text)
: text(text)
{
begin = std::chrono::steady_clock::now();
}
~ScopeBenchmark()
{
std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
printf("%s: %lli\n", text.data(), std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count());
}
private:
std::string text;
std::chrono::steady_clock::time_point begin;
};
constexpr int32_t Color(uint8_t r, uint8_t g, uint8_t b)
{
return (r << 16) | (g << 8) | b;
}
struct v2
{
float x;
float y;
};
inline v2 operator+(v2 a, v2 b)
{
v2 result;
result.x = a.x + b.x;
result.y = a.y + b.y;
return result;
}
inline v2 operator*(v2 a, float b)
{
v2 result;
result.x = a.x * b;
result.y = a.y * b;
return result;
}
//----------------------------------------------------------------------
struct Ship
{
v2 pos;
v2 size;
v2 dir;
float speed;
int32_t color;
v2 a;
v2 b;
v2 c;
v2 d;
};
struct ShipMovement
{
v2 dir;
float speed;
};
struct ShipDrawing
{
v2 size;
int32_t color;
};
struct ShipOtherData
{
v2 a;
v2 b;
v2 c;
v2 d;
};
constexpr int MAX_ENEMY_SHIPS = 4000000;
struct Ships
{
int32_t count;
v2 pos[MAX_ENEMY_SHIPS];
ShipMovement movements[MAX_ENEMY_SHIPS];
ShipDrawing drawings[MAX_ENEMY_SHIPS];
//ShipOtherData other[MAX_ENEMY_SHIPS];
void Add(Ship ship)
{
pos[count] = ship.pos;
movements[count] = { ship.dir, ship.speed };
drawings[count] = { ship.size, ship.color };
//other[count] = { ship.a, ship.b, ship.c, ship.d };
count++;
}
};
void MoveShips(v2* positions, ShipMovement* movements, int count)
{
ScopeBenchmark bench("Move Ships");
for(int i = 0; i < count; ++i)
{
positions[i] = positions[i] + (movements[i].dir * movements[i].speed);
}
}
struct Game
{
int32_t playerShipIndex;
Ships ships;
};
void InitGame(void* gameMemory)
{
Game* game = (Game*)gameMemory;
Ship ship;
ship.pos = { 0.0f, 0.0f };
ship.size = { 100.0f, 100.0f };
ship.speed = 1.0f;
ship.color = Color(64, 192, 32);
game->ships.Add(ship);
game->playerShipIndex = 0;
ship.speed *= 0.5f;
ship.dir.x = -1.0f;
ship.size = { 50.0f, 50.0f };
ship.color = Color(192, 64, 32);
for(int i = 0; i < MAX_ENEMY_SHIPS; i++)
{
ship.pos = { 500.0f, 350.0f };
game->ships.Add(ship);
}
}
int main()
{
Game* game = (Game*)malloc(sizeof(Game));
memset(game, 0, sizeof(Game));
InitGame(game);
while (true)
{
MoveShips(game->ships.pos, game->ships.movements, game->ships.count);
}
}
I use the Visual Studio compiler, and compile the file with the following command:
cl.exe /O2 /GL src/Game.cpp
So, my question is: why the performance of the MoveShips function drops so hard when adding memory that is not used?