In a desire to speed up a voxel renderer I have been writing, I precomputed the scene into a display list. The scene is a simple 50x50x3 blob of culled voxels, so that's 50*50*2+50*3*4 or 5600 quads. Each quad is associated with a texture so that's 22400 vertices each with a texture coordinate.
The speed difference is incredible: It takes well under 16ms for the non-list program to render a frame, and 50ms, wildly fluctuating, for the list program to render.
I read https://www.opengl.org/archives/resources/faq/technical/displaylist.htm and it suggested that performance doesn't improve if the program thrashes, as display lists use more memory. However, both versions of my program use a steady 10.0MB as far as I can tell from the system monitor.
I am currently using SDL2, freeglut3, an AMD Athlon(tm) II X4 645 Processor × 4 and it's integrated graphics, Gallium 0.4 on AMD RS880, and Debian GNU/Linux 8 (jessie) 64-bit, if this is some system specific issue.
How I am rendering without vertex lists is that this code is being called for every last voxel:
void draw_cube_culled(spritesheet * sheet, rect * front, rect * back, rect * left, rect * right, rect * top, rect * bot, float x, float y, float z, float s, renderstate * state) {
glBindTexture(GL_TEXTURE_2D, sheet->sheet_image->texture);
side_t side;
glBegin(GL_QUADS);
glColor3f(1.0f, 1.0f, 1.0f);
//BOT--Y constant
side = BOT;
if(state->side[side]) {
glTexCoord2f(bot->left, bot->bot); glVertex3f(x, y, z);
glTexCoord2f(bot->right, bot->bot); glVertex3f(x+s, y, z);
glTexCoord2f(bot->right, bot->top); glVertex3f(x+s, y, z+s);
glTexCoord2f(bot->left, bot->top); glVertex3f(x, y, z+s);
}
//TOP--Y constant
side = TOP;
if(state->side[side]) {
glTexCoord2f(top->left, top->bot); glVertex3f(x, y+s, z);
glTexCoord2f(top->right, top->bot); glVertex3f(x+s, y+s, z);
glTexCoord2f(top->right, top->top); glVertex3f(x+s, y+s, z+s);
glTexCoord2f(top->left, top->top); glVertex3f(x, y+s, z+s);
}
//LEFT--X constant
side = LEFT;
if(state->side[side]) {
glTexCoord2f(left->left, left->bot); glVertex3f(x, y, z);
glTexCoord2f(left->right, left->bot); glVertex3f(x, y, z+s);
glTexCoord2f(left->right, left->top); glVertex3f(x, y+s, z+s);
glTexCoord2f(left->left, left->top); glVertex3f(x, y+s, z);
}
//RIGHT--X constant
side = RIGHT;
if(state->side[side]) {
glTexCoord2f(right->right, right->bot); glVertex3f(x+s, y, z);
glTexCoord2f(right->left, right->bot); glVertex3f(x+s, y, z+s);
glTexCoord2f(right->left, right->top); glVertex3f(x+s, y+s, z+s);
glTexCoord2f(right->right, right->top); glVertex3f(x+s, y+s, z);
}
//back--Z constant
side = BACK;
if(state->side[side]) {
glTexCoord2f(back->right, back->bot); glVertex3f(x, y, z);
glTexCoord2f(back->left, back->bot); glVertex3f(x+s, y, z);
glTexCoord2f(back->left, back->top); glVertex3f(x+s, y+s, z);
glTexCoord2f(back->right, back->top); glVertex3f(x, y+s, z);
}
//front--Z constant
side = FRONT;
if(state->side[side]) {
glTexCoord2f(front->left, front->bot); glVertex3f(x, y, z+s);
glTexCoord2f(front->right, front->bot); glVertex3f(x+s, y, z+s);
glTexCoord2f(front->right, front->top); glVertex3f(x+s, y+s, z+s);
glTexCoord2f(front->left, front->top); glVertex3f(x, y+s, z+s);
}
glEnd();
}
In order to generate the display lists I call this code once at initialization time (note that draw_diorama is the method I use to draw the scene in the normal code, so it calls the function above 50*50*3 times):
void diorama_compile(diorama * d) {
glNewList(d->compiled,GL_COMPILE);
draw_diorama(d);
glEndList();
}
I am rendering the display list like so:
void draw_compiled_diorama(diorama * d) {
glCallList(d->compiled);
}
This really confuses me because one code involves branching at least 6*50*50*3 times a screen, lots of reading different kinds of memory, floating point math all on the CPU and the other doesn't.