Add vao cache for static surfaces.

Remove support for draw range elements, multi draw arrays, world vao creation, surface merging.
This commit is contained in:
SmileTheory 2017-04-28 02:13:25 -07:00
parent 127464ed19
commit c65d2c2657
12 changed files with 413 additions and 730 deletions

View file

@ -379,19 +379,11 @@ extern void (APIENTRYP qglUnlockArraysEXT) (void);
// GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a
// OpenGL 1.2, was GL_EXT_draw_range_elements
#define QGL_1_2_PROCS \
GLE(void, DrawRangeElements, GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) \
// OpenGL 1.3, was GL_ARB_texture_compression
#define QGL_1_3_PROCS \
GLE(void, CompressedTexImage2D, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data) \
GLE(void, CompressedTexSubImage2D, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data) \
// OpenGL 1.4, was GL_EXT_multi_draw_arrays
#define QGL_1_4_PROCS \
GLE(void, MultiDrawElements, GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount) \
// OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query
#define QGL_1_5_PROCS \
GLE(void, GenQueries, GLsizei n, GLuint *ids) \
@ -640,9 +632,7 @@ extern void (APIENTRYP qglUnlockArraysEXT) (void);
GLE(GLvoid, NamedFramebufferRenderbufferEXT, GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer) \
#define GLE(ret, name, ...) typedef ret APIENTRY name##proc(__VA_ARGS__); extern name##proc * qgl##name;
QGL_1_2_PROCS;
QGL_1_3_PROCS;
QGL_1_4_PROCS;
QGL_1_5_PROCS;
QGL_2_0_PROCS;
QGL_EXT_framebuffer_object_PROCS;

View file

@ -1680,428 +1680,6 @@ void R_MovePatchSurfacesToHunk(void) {
}
/*
=================
BSPSurfaceCompare
compare function for qsort()
=================
*/
static int BSPSurfaceCompare(const void *a, const void *b)
{
msurface_t *aa, *bb;
aa = *(msurface_t **) a;
bb = *(msurface_t **) b;
// shader first
if(aa->shader->sortedIndex < bb->shader->sortedIndex)
return -1;
else if(aa->shader->sortedIndex > bb->shader->sortedIndex)
return 1;
// by fogIndex
if(aa->fogIndex < bb->fogIndex)
return -1;
else if(aa->fogIndex > bb->fogIndex)
return 1;
// by cubemapIndex
if(aa->cubemapIndex < bb->cubemapIndex)
return -1;
else if(aa->cubemapIndex > bb->cubemapIndex)
return 1;
// by leaf
if (s_worldData.surfacesViewCount[aa - s_worldData.surfaces] < s_worldData.surfacesViewCount[bb - s_worldData.surfaces])
return -1;
else if (s_worldData.surfacesViewCount[aa - s_worldData.surfaces] > s_worldData.surfacesViewCount[bb - s_worldData.surfaces])
return 1;
// by surface number
if (aa < bb)
return -1;
else if (aa > bb)
return 1;
return 0;
}
static void CopyVert(const srfVert_t * in, srfVert_t * out)
{
VectorCopy(in->xyz, out->xyz);
VectorCopy4(in->tangent, out->tangent);
VectorCopy4(in->normal, out->normal);
VectorCopy4(in->lightdir, out->lightdir);
VectorCopy2(in->st, out->st);
VectorCopy2(in->lightmap, out->lightmap);
VectorCopy4(in->color, out->color);
}
/*
===============
R_CreateWorldVaos
===============
*/
static void R_CreateWorldVaos(void)
{
int i, j, k;
int numVerts;
srfVert_t *verts;
int numIndexes;
glIndex_t *indexes;
int numSortedSurfaces, numSurfaces;
msurface_t *surface, **firstSurf, **lastSurf, **currSurf;
msurface_t **surfacesSorted;
vao_t *vao;
int maxVboSize = 4 * 1024 * 1024;
int startTime, endTime;
startTime = ri.Milliseconds();
// mark surfaces with best matching leaf, using overlapping bounds
// using surfaceViewCount[] as leaf number, and surfacesDlightBits[] as coverage * 256
for (i = 0; i < s_worldData.numWorldSurfaces; i++)
{
s_worldData.surfacesViewCount[i] = -1;
}
for (i = 0; i < s_worldData.numWorldSurfaces; i++)
{
s_worldData.surfacesDlightBits[i] = 0;
}
for (i = s_worldData.numDecisionNodes; i < s_worldData.numnodes; i++)
{
mnode_t *leaf = s_worldData.nodes + i;
for (j = leaf->firstmarksurface; j < leaf->firstmarksurface + leaf->nummarksurfaces; j++)
{
int surfaceNum = s_worldData.marksurfaces[j];
msurface_t *surface = s_worldData.surfaces + surfaceNum;
float coverage = 1.0f;
int iCoverage;
for (k = 0; k < 3; k++)
{
float left, right;
if (leaf->mins[k] > surface->cullinfo.bounds[1][k] || surface->cullinfo.bounds[0][k] > leaf->maxs[k])
{
coverage = 0.0f;
break;
}
left = MAX(leaf->mins[k], surface->cullinfo.bounds[0][k]);
right = MIN(leaf->maxs[k], surface->cullinfo.bounds[1][k]);
// nudge a bit in case this is an axis aligned wall
coverage *= right - left + 1.0f/256.0f;
}
iCoverage = coverage * 256;
if (iCoverage > s_worldData.surfacesDlightBits[surfaceNum])
{
s_worldData.surfacesDlightBits[surfaceNum] = iCoverage;
s_worldData.surfacesViewCount[surfaceNum] = i - s_worldData.numDecisionNodes;
}
}
}
for (i = 0; i < s_worldData.numWorldSurfaces; i++)
{
s_worldData.surfacesDlightBits[i] = 0;
}
// count surfaces
numSortedSurfaces = 0;
for(surface = s_worldData.surfaces; surface < s_worldData.surfaces + s_worldData.numWorldSurfaces; surface++)
{
srfBspSurface_t *bspSurf;
shader_t *shader = surface->shader;
if (shader->isPortal || shader->isSky || ShaderRequiresCPUDeforms(shader))
continue;
// check for this now so we can use srfBspSurface_t* universally in the rest of the function
if (!(*surface->data == SF_FACE || *surface->data == SF_GRID || *surface->data == SF_TRIANGLES))
continue;
bspSurf = (srfBspSurface_t *) surface->data;
if (!bspSurf->numIndexes || !bspSurf->numVerts)
continue;
numSortedSurfaces++;
}
// presort surfaces
surfacesSorted = ri.Malloc(numSortedSurfaces * sizeof(*surfacesSorted));
j = 0;
for(surface = s_worldData.surfaces; surface < s_worldData.surfaces + s_worldData.numWorldSurfaces; surface++)
{
srfBspSurface_t *bspSurf;
shader_t *shader = surface->shader;
if (shader->isPortal || shader->isSky || ShaderRequiresCPUDeforms(shader))
continue;
// check for this now so we can use srfBspSurface_t* universally in the rest of the function
if (!(*surface->data == SF_FACE || *surface->data == SF_GRID || *surface->data == SF_TRIANGLES))
continue;
bspSurf = (srfBspSurface_t *) surface->data;
if (!bspSurf->numIndexes || !bspSurf->numVerts)
continue;
surfacesSorted[j++] = surface;
}
qsort(surfacesSorted, numSortedSurfaces, sizeof(*surfacesSorted), BSPSurfaceCompare);
k = 0;
for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
{
int currVboSize;
// Find range of surfaces to place in a VAO by:
// - Collecting a number of surfaces which fit under maxVboSize, or
// - All the surfaces with a single shader which go over maxVboSize
currVboSize = 0;
while (currVboSize < maxVboSize && lastSurf < surfacesSorted + numSortedSurfaces)
{
int addVboSize, currShaderIndex;
addVboSize = 0;
currShaderIndex = (*lastSurf)->shader->sortedIndex;
for(currSurf = lastSurf; currSurf < surfacesSorted + numSortedSurfaces && (*currSurf)->shader->sortedIndex == currShaderIndex; currSurf++)
{
srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
addVboSize += bspSurf->numVerts * sizeof(srfVert_t);
}
if (currVboSize != 0 && addVboSize + currVboSize > maxVboSize)
break;
lastSurf = currSurf;
currVboSize += addVboSize;
}
// count verts/indexes/surfaces
numVerts = 0;
numIndexes = 0;
numSurfaces = 0;
for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
{
srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
numVerts += bspSurf->numVerts;
numIndexes += bspSurf->numIndexes;
numSurfaces++;
}
ri.Printf(PRINT_ALL, "...calculating world VAO %d ( %i verts %i tris )\n", k, numVerts, numIndexes / 3);
// create arrays
verts = ri.Hunk_AllocateTempMemory(numVerts * sizeof(srfVert_t));
indexes = ri.Hunk_AllocateTempMemory(numIndexes * sizeof(glIndex_t));
// set up indices and copy vertices
numVerts = 0;
numIndexes = 0;
for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
{
srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
glIndex_t *surfIndex;
bspSurf->firstIndex = numIndexes;
bspSurf->minIndex = numVerts + bspSurf->indexes[0];
bspSurf->maxIndex = numVerts + bspSurf->indexes[0];
for(i = 0, surfIndex = bspSurf->indexes; i < bspSurf->numIndexes; i++, surfIndex++)
{
indexes[numIndexes++] = numVerts + *surfIndex;
bspSurf->minIndex = MIN(bspSurf->minIndex, numVerts + *surfIndex);
bspSurf->maxIndex = MAX(bspSurf->maxIndex, numVerts + *surfIndex);
}
bspSurf->firstVert = numVerts;
for(i = 0; i < bspSurf->numVerts; i++)
{
CopyVert(&bspSurf->verts[i], &verts[numVerts++]);
}
}
vao = R_CreateVao2(va("staticBspModel%i_VAO", k), numVerts, verts, numIndexes, indexes);
// point bsp surfaces to VAO
for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
{
srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
bspSurf->vao = vao;
}
ri.Hunk_FreeTempMemory(indexes);
ri.Hunk_FreeTempMemory(verts);
k++;
}
if (r_mergeLeafSurfaces->integer)
{
msurface_t *mergedSurf;
// count merged surfaces
int numMergedSurfaces = 0, numUnmergedSurfaces = 0;
for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
{
for (lastSurf++ ; lastSurf < surfacesSorted + numSortedSurfaces; lastSurf++)
{
int lastSurfLeafIndex, firstSurfLeafIndex;
if ((*lastSurf)->shader != (*firstSurf)->shader
|| (*lastSurf)->fogIndex != (*firstSurf)->fogIndex
|| (*lastSurf)->cubemapIndex != (*firstSurf)->cubemapIndex)
break;
lastSurfLeafIndex = s_worldData.surfacesViewCount[*lastSurf - s_worldData.surfaces];
firstSurfLeafIndex = s_worldData.surfacesViewCount[*firstSurf - s_worldData.surfaces];
if (lastSurfLeafIndex != firstSurfLeafIndex)
break;
}
// don't merge single surfaces
if (firstSurf + 1 == lastSurf)
{
numUnmergedSurfaces++;
continue;
}
numMergedSurfaces++;
}
// Allocate merged surfaces
s_worldData.mergedSurfaces = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfaces) * numMergedSurfaces, h_low);
s_worldData.mergedSurfacesViewCount = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesViewCount) * numMergedSurfaces, h_low);
s_worldData.mergedSurfacesDlightBits = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesDlightBits) * numMergedSurfaces, h_low);
s_worldData.mergedSurfacesPshadowBits = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesPshadowBits) * numMergedSurfaces, h_low);
s_worldData.numMergedSurfaces = numMergedSurfaces;
// view surfaces are like mark surfaces, except negative ones represent merged surfaces
// -1 represents 0, -2 represents 1, and so on
s_worldData.viewSurfaces = ri.Hunk_Alloc(sizeof(*s_worldData.viewSurfaces) * s_worldData.nummarksurfaces, h_low);
// actually merge surfaces
mergedSurf = s_worldData.mergedSurfaces;
for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
{
srfBspSurface_t *bspSurf, *vaoSurf;
for ( lastSurf++ ; lastSurf < surfacesSorted + numSortedSurfaces; lastSurf++)
{
int lastSurfLeafIndex, firstSurfLeafIndex;
if ((*lastSurf)->shader != (*firstSurf)->shader
|| (*lastSurf)->fogIndex != (*firstSurf)->fogIndex
|| (*lastSurf)->cubemapIndex != (*firstSurf)->cubemapIndex)
break;
lastSurfLeafIndex = s_worldData.surfacesViewCount[*lastSurf - s_worldData.surfaces];
firstSurfLeafIndex = s_worldData.surfacesViewCount[*firstSurf - s_worldData.surfaces];
if (lastSurfLeafIndex != firstSurfLeafIndex)
break;
}
// don't merge single surfaces
if (firstSurf + 1 == lastSurf)
continue;
bspSurf = (srfBspSurface_t *)(*firstSurf)->data;
vaoSurf = ri.Hunk_Alloc(sizeof(*vaoSurf), h_low);
memset(vaoSurf, 0, sizeof(*vaoSurf));
vaoSurf->surfaceType = SF_VAO_MESH;
vaoSurf->vao = bspSurf->vao;
vaoSurf->firstIndex = bspSurf->firstIndex;
vaoSurf->minIndex = bspSurf->minIndex;
vaoSurf->maxIndex = bspSurf->maxIndex;
ClearBounds(vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
{
srfBspSurface_t *currBspSurf = (srfBspSurface_t *)(*currSurf)->data;
vaoSurf->numVerts += currBspSurf->numVerts;
vaoSurf->numIndexes += currBspSurf->numIndexes;
vaoSurf->minIndex = MIN(vaoSurf->minIndex, currBspSurf->minIndex);
vaoSurf->maxIndex = MAX(vaoSurf->maxIndex, currBspSurf->maxIndex);
AddPointToBounds((*currSurf)->cullinfo.bounds[0], vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
AddPointToBounds((*currSurf)->cullinfo.bounds[1], vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
}
VectorCopy(vaoSurf->cullBounds[0], mergedSurf->cullinfo.bounds[0]);
VectorCopy(vaoSurf->cullBounds[1], mergedSurf->cullinfo.bounds[1]);
mergedSurf->cullinfo.type = CULLINFO_BOX;
mergedSurf->data = (surfaceType_t *)vaoSurf;
mergedSurf->fogIndex = (*firstSurf)->fogIndex;
mergedSurf->cubemapIndex = (*firstSurf)->cubemapIndex;
mergedSurf->shader = (*firstSurf)->shader;
// change surfacesViewCount[] from leaf index to viewSurface index - 1 so we can redirect later
// subtracting 2 (viewSurface index - 1) to avoid collision with -1 (no leaf)
for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
s_worldData.surfacesViewCount[*currSurf - s_worldData.surfaces] = -((int)(mergedSurf - s_worldData.mergedSurfaces)) - 2;
mergedSurf++;
}
// direct viewSurfaces to merged and unmerged surfaces
for (i = 0; i < s_worldData.nummarksurfaces; i++)
{
int viewSurfaceIndex = s_worldData.surfacesViewCount[s_worldData.marksurfaces[i]] + 1;
s_worldData.viewSurfaces[i] = (viewSurfaceIndex < 0) ? viewSurfaceIndex : s_worldData.marksurfaces[i];
}
ri.Printf(PRINT_ALL, "Processed %d mergeable surfaces into %d merged, %d unmerged\n",
numSortedSurfaces, numMergedSurfaces, numUnmergedSurfaces);
}
for (i = 0; i < s_worldData.numWorldSurfaces; i++)
s_worldData.surfacesViewCount[i] = -1;
ri.Free(surfacesSorted);
endTime = ri.Milliseconds();
ri.Printf(PRINT_ALL, "world VAOs calculation time = %5.2f seconds\n", (endTime - startTime) / 1000.0);
}
/*
===============
R_LoadSurfaces
@ -3420,9 +2998,6 @@ void RE_LoadWorldMap( const char *name ) {
}
}
// create static VAOS from the world
R_CreateWorldVaos();
s_worldData.dataSize = (byte *)ri.Hunk_Alloc(0, h_low) - startMarker;
// only set tr.world now that we know the entire level has loaded properly

View file

@ -66,8 +66,8 @@ void R_PerformanceCounters( void ) {
}
else if (r_speeds->integer == 7 )
{
ri.Printf( PRINT_ALL, "VAO draws: static %i dynamic %i\nMultidraws: %i merged %i\n",
backEnd.pc.c_staticVaoDraws, backEnd.pc.c_dynamicVaoDraws, backEnd.pc.c_multidraws, backEnd.pc.c_multidrawsMerged );
ri.Printf( PRINT_ALL, "VAO draws: static %i dynamic %i\n",
backEnd.pc.c_staticVaoDraws, backEnd.pc.c_dynamicVaoDraws);
ri.Printf( PRINT_ALL, "GLSL binds: %i draws: gen %i light %i fog %i dlight %i\n",
backEnd.pc.c_glslShaderBinds, backEnd.pc.c_genericDraws, backEnd.pc.c_lightallDraws, backEnd.pc.c_fogDraws, backEnd.pc.c_dlightDraws);
}

View file

@ -31,9 +31,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
#include "tr_dsa.h"
#define GLE(ret, name, ...) name##proc * qgl##name;
QGL_1_2_PROCS;
QGL_1_3_PROCS;
QGL_1_4_PROCS;
QGL_1_5_PROCS;
QGL_2_0_PROCS;
QGL_EXT_framebuffer_object_PROCS;
@ -67,19 +65,9 @@ void GLimp_InitExtraExtensions()
// GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a
#define GLE(ret, name, ...) qgl##name = (name##proc *) SDL_GL_GetProcAddress("gl" #name);
// OpenGL 1.2, was GL_EXT_draw_range_elements
QGL_1_2_PROCS;
glRefConfig.drawRangeElements = !!r_ext_draw_range_elements->integer;
ri.Printf(PRINT_ALL, result[glRefConfig.drawRangeElements], "glDrawRangeElements()");
// OpenGL 1.3, was GL_ARB_texture_compression
QGL_1_3_PROCS;
// OpenGL 1.4, was GL_EXT_multi_draw_arrays
QGL_1_4_PROCS;
glRefConfig.multiDrawArrays = !!r_ext_multi_draw_arrays->integer;
ri.Printf(PRINT_ALL, result[glRefConfig.multiDrawArrays], "glMultiDrawElements()");
// OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query
QGL_1_5_PROCS;
glRefConfig.occlusionQuery = qtrue;

View file

@ -98,8 +98,6 @@ cvar_t *r_ext_texture_env_add;
cvar_t *r_ext_texture_filter_anisotropic;
cvar_t *r_ext_max_anisotropy;
cvar_t *r_ext_draw_range_elements;
cvar_t *r_ext_multi_draw_arrays;
cvar_t *r_ext_framebuffer_object;
cvar_t *r_ext_texture_float;
cvar_t *r_ext_framebuffer_multisample;
@ -1158,8 +1156,6 @@ void R_Register( void )
r_ext_compiled_vertex_array = ri.Cvar_Get( "r_ext_compiled_vertex_array", "1", CVAR_ARCHIVE | CVAR_LATCH);
r_ext_texture_env_add = ri.Cvar_Get( "r_ext_texture_env_add", "1", CVAR_ARCHIVE | CVAR_LATCH);
r_ext_draw_range_elements = ri.Cvar_Get( "r_ext_draw_range_elements", "1", CVAR_ARCHIVE | CVAR_LATCH);
r_ext_multi_draw_arrays = ri.Cvar_Get( "r_ext_multi_draw_arrays", "1", CVAR_ARCHIVE | CVAR_LATCH);
r_ext_framebuffer_object = ri.Cvar_Get( "r_ext_framebuffer_object", "1", CVAR_ARCHIVE | CVAR_LATCH);
r_ext_texture_float = ri.Cvar_Get( "r_ext_texture_float", "1", CVAR_ARCHIVE | CVAR_LATCH);
r_ext_framebuffer_multisample = ri.Cvar_Get( "r_ext_framebuffer_multisample", "0", CVAR_ARCHIVE | CVAR_LATCH);

View file

@ -929,8 +929,6 @@ typedef struct srfBspSurface_s
// BSP VBO offsets
int firstVert;
int firstIndex;
glIndex_t minIndex;
glIndex_t maxIndex;
// static render data
vao_t *vao;
@ -1005,8 +1003,6 @@ typedef struct srfVaoMdvMesh_s
// backEnd stats
int numIndexes;
int numVerts;
glIndex_t minIndex;
glIndex_t maxIndex;
// static render data
vao_t *vao;
@ -1135,15 +1131,8 @@ typedef struct {
int *surfacesDlightBits;
int *surfacesPshadowBits;
int numMergedSurfaces;
msurface_t *mergedSurfaces;
int *mergedSurfacesViewCount;
int *mergedSurfacesDlightBits;
int *mergedSurfacesPshadowBits;
int nummarksurfaces;
int *marksurfaces;
int *viewSurfaces;
int numfogs;
fog_t *fogs;
@ -1375,8 +1364,6 @@ typedef struct {
qboolean intelGraphics;
qboolean drawRangeElements;
qboolean multiDrawArrays;
qboolean occlusionQuery;
int glslMajorVersion;
@ -1415,9 +1402,6 @@ typedef struct {
int c_staticVaoDraws;
int c_dynamicVaoDraws;
int c_multidraws;
int c_multidrawsMerged;
int c_dlightVertexes;
int c_dlightIndexes;
@ -1689,8 +1673,6 @@ extern cvar_t *r_showcluster;
extern cvar_t *r_gamma;
extern cvar_t *r_displayRefresh; // optional display refresh option
extern cvar_t *r_ext_draw_range_elements;
extern cvar_t *r_ext_multi_draw_arrays;
extern cvar_t *r_ext_framebuffer_object;
extern cvar_t *r_ext_texture_float;
extern cvar_t *r_ext_framebuffer_multisample;
@ -1977,8 +1959,6 @@ typedef struct stageVars
vec2_t texcoords[NUM_TEXTURE_BUNDLES][SHADER_MAX_VERTEXES];
} stageVars_t;
#define MAX_MULTIDRAW_PRIMITIVES 256
typedef struct shaderCommands_s
{
glIndex_t indexes[SHADER_MAX_INDEXES] QALIGN(16);
@ -1994,6 +1974,7 @@ typedef struct shaderCommands_s
void *attribPointers[ATTR_INDEX_COUNT];
vao_t *vao;
qboolean useInternalVao;
qboolean useCacheVao;
stageVars_t svars QALIGN(16);
@ -2010,14 +1991,6 @@ typedef struct shaderCommands_s
int firstIndex;
int numIndexes;
int numVertexes;
glIndex_t minIndex;
glIndex_t maxIndex;
int multiDrawPrimitives;
GLsizei multiDrawNumIndexes[MAX_MULTIDRAW_PRIMITIVES];
glIndex_t *multiDrawFirstIndex[MAX_MULTIDRAW_PRIMITIVES];
glIndex_t multiDrawMinIndex[MAX_MULTIDRAW_PRIMITIVES];
glIndex_t multiDrawMaxIndex[MAX_MULTIDRAW_PRIMITIVES];
// info extracted from current shader
int numPasses;
@ -2032,7 +2005,7 @@ void RB_EndSurface(void);
void RB_CheckOverflow( int verts, int indexes );
#define RB_CHECKOVERFLOW(v,i) if (tess.numVertexes + (v) >= SHADER_MAX_VERTEXES || tess.numIndexes + (i) >= SHADER_MAX_INDEXES ) {RB_CheckOverflow(v,i);}
void R_DrawElementsVao( int numIndexes, glIndex_t firstIndex, glIndex_t minIndex, glIndex_t maxIndex );
void R_DrawElements( int numIndexes, glIndex_t firstIndex );
void RB_StageIteratorGeneric( void );
void RB_StageIteratorSky( void );
void RB_StageIteratorVertexLitTexture( void );
@ -2171,6 +2144,14 @@ void R_VaoList_f(void);
void RB_UpdateTessVao(unsigned int attribBits);
void VaoCache_Commit(void);
void VaoCache_Init(void);
void VaoCache_BindVao(void);
void VaoCache_CheckAdd(qboolean *endSurface, qboolean *recycleVertexBuffer, qboolean *recycleIndexBuffer, int numVerts, int numIndexes);
void VaoCache_RecycleVertexBuffer(void);
void VaoCache_RecycleIndexBuffer(void);
void VaoCache_InitQueue(void);
void VaoCache_AddSurface(srfVert_t *verts, int numVerts, glIndex_t *indexes, int numIndexes);
/*
============================================================

View file

@ -764,9 +764,6 @@ static qboolean R_LoadMD3(model_t * mod, int lod, void *buffer, int bufferSize,
vaoSurf->numIndexes = surf->numIndexes;
vaoSurf->numVerts = surf->numVerts;
vaoSurf->minIndex = 0;
vaoSurf->maxIndex = surf->numVerts - 1;
vaoSurf->vao = R_CreateVao(va("staticMD3Mesh_VAO '%s'", surf->name), data, dataSize, (byte *)surf->indexes, surf->numIndexes * sizeof(*surf->indexes), VAO_USAGE_STATIC);
vaoSurf->vao->attribs[ATTR_INDEX_POSITION].enabled = 1;

View file

@ -41,42 +41,9 @@ R_DrawElements
==================
*/
void R_DrawElementsVao( int numIndexes, glIndex_t firstIndex, glIndex_t minIndex, glIndex_t maxIndex )
void R_DrawElements( int numIndexes, glIndex_t firstIndex)
{
if (glRefConfig.drawRangeElements)
qglDrawRangeElements(GL_TRIANGLES, minIndex, maxIndex, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
else
qglDrawElements(GL_TRIANGLES, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
}
static void R_DrawMultiElementsVao( int multiDrawPrimitives, glIndex_t *multiDrawMinIndex, glIndex_t *multiDrawMaxIndex,
GLsizei *multiDrawNumIndexes, glIndex_t **multiDrawFirstIndex)
{
if (glRefConfig.multiDrawArrays && multiDrawPrimitives > 1)
{
qglMultiDrawElements(GL_TRIANGLES, multiDrawNumIndexes, GL_INDEX_TYPE, (const GLvoid **)multiDrawFirstIndex, multiDrawPrimitives);
}
else
{
int i;
if (glRefConfig.drawRangeElements)
{
for (i = 0; i < multiDrawPrimitives; i++)
{
qglDrawRangeElements(GL_TRIANGLES, multiDrawMinIndex[i], multiDrawMaxIndex[i], multiDrawNumIndexes[i], GL_INDEX_TYPE, multiDrawFirstIndex[i]);
}
}
else
{
for (i = 0; i < multiDrawPrimitives; i++)
{
qglDrawElements(GL_TRIANGLES, multiDrawNumIndexes[i], GL_INDEX_TYPE, multiDrawFirstIndex[i]);
}
}
}
qglDrawElements(GL_TRIANGLES, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
}
@ -149,14 +116,7 @@ static void DrawTris (shaderCommands_t *input) {
VectorSet4(color, 1, 1, 1, 1);
GLSL_SetUniformVec4(sp, UNIFORM_COLOR, color);
if (input->multiDrawPrimitives)
{
R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
}
else
{
R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
}
R_DrawElements(input->numIndexes, input->firstIndex);
}
qglDepthRange( 0, 1 );
@ -190,7 +150,6 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) {
tess.numIndexes = 0;
tess.firstIndex = 0;
tess.numVertexes = 0;
tess.multiDrawPrimitives = 0;
tess.shader = state;
tess.fogNum = fogNum;
tess.cubemapIndex = cubemapIndex;
@ -200,6 +159,7 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) {
tess.numPasses = state->numUnfoggedPasses;
tess.currentStageIteratorFunc = state->optimalStageIteratorFunc;
tess.useInternalVao = qtrue;
tess.useCacheVao = qfalse;
tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset;
if (tess.shader->clampTime && tess.shaderTime >= tess.shader->clampTime) {
@ -423,15 +383,7 @@ static void ProjectDlightTexture( void ) {
GL_State( GLS_ATEST_GT_0 | GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
}
if (tess.multiDrawPrimitives)
{
shaderCommands_t *input = &tess;
R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
}
else
{
R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
}
R_DrawElements(tess.numIndexes, tess.firstIndex);
backEnd.pc.c_totalIndexes += tess.numIndexes;
backEnd.pc.c_dlightIndexes += tess.numIndexes;
@ -844,14 +796,7 @@ static void ForwardDlight( void ) {
// draw
//
if (input->multiDrawPrimitives)
{
R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
}
else
{
R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
}
R_DrawElements(input->numIndexes, input->firstIndex);
backEnd.pc.c_totalIndexes += tess.numIndexes;
backEnd.pc.c_dlightIndexes += tess.numIndexes;
@ -920,14 +865,7 @@ static void ProjectPshadowVBOGLSL( void ) {
// draw
//
if (input->multiDrawPrimitives)
{
R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
}
else
{
R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
}
R_DrawElements(input->numIndexes, input->firstIndex);
backEnd.pc.c_totalIndexes += tess.numIndexes;
//backEnd.pc.c_dlightIndexes += tess.numIndexes;
@ -1002,15 +940,7 @@ static void RB_FogPass( void ) {
GL_State( GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA );
}
if (tess.multiDrawPrimitives)
{
shaderCommands_t *input = &tess;
R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
}
else
{
R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
}
R_DrawElements(tess.numIndexes, tess.firstIndex);
}
@ -1391,14 +1321,7 @@ static void RB_IterateStagesGeneric( shaderCommands_t *input )
//
// draw
//
if (input->multiDrawPrimitives)
{
R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
}
else
{
R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
}
R_DrawElements(input->numIndexes, input->firstIndex);
// allow skipping out to show just lightmaps during development
if ( r_lightmap->integer && ( pStage->bundle[0].isLightmap || pStage->bundle[1].isLightmap ) )
@ -1455,14 +1378,7 @@ static void RB_RenderShadowmap( shaderCommands_t *input )
// draw
//
if (input->multiDrawPrimitives)
{
R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
}
else
{
R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
}
R_DrawElements(input->numIndexes, input->firstIndex);
}
}
}
@ -1627,7 +1543,6 @@ void RB_StageIteratorGeneric( void )
}
}
/*
** RB_EndSurface
*/
@ -1657,6 +1572,12 @@ void RB_EndSurface( void ) {
return;
}
if (tess.useCacheVao)
{
// upload indexes now
VaoCache_Commit();
}
//
// update performance counters
//
@ -1683,7 +1604,6 @@ void RB_EndSurface( void ) {
tess.numIndexes = 0;
tess.numVertexes = 0;
tess.firstIndex = 0;
tess.multiDrawPrimitives = 0;
GLimp_LogComment( "----------\n" );
}

View file

@ -366,8 +366,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
int s, t;
int firstVertex = tess.numVertexes;
//int firstIndex = tess.numIndexes;
int minIndex = tess.minIndex;
int maxIndex = tess.maxIndex;
vec4_t color;
//tess.numVertexes = 0;
@ -417,9 +415,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
}
}
tess.minIndex = firstVertex;
tess.maxIndex = tess.numVertexes;
// FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function
RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD);
/*
@ -465,7 +460,7 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
GLSL_SetUniformVec4(sp, UNIFORM_DIFFUSETEXOFFTURB, vector);
}
R_DrawElementsVao(tess.numIndexes - tess.firstIndex, tess.firstIndex, tess.minIndex, tess.maxIndex);
R_DrawElements(tess.numIndexes - tess.firstIndex, tess.firstIndex);
//qglDrawElements(GL_TRIANGLES, tess.numIndexes - tess.firstIndex, GL_INDEX_TYPE, BUFFER_OFFSET(tess.firstIndex * sizeof(glIndex_t)));
@ -475,8 +470,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
tess.numIndexes = tess.firstIndex;
tess.numVertexes = firstVertex;
tess.firstIndex = 0;
tess.minIndex = minIndex;
tess.maxIndex = maxIndex;
}
static void DrawSkyBox( shader_t *shader )

View file

@ -68,7 +68,7 @@ void RB_CheckOverflow( int verts, int indexes ) {
void RB_CheckVao(vao_t *vao)
{
if (vao != glState.currentVao || tess.multiDrawPrimitives >= MAX_MULTIDRAW_PRIMITIVES)
if (vao != glState.currentVao)
{
RB_EndSurface();
RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
@ -208,18 +208,14 @@ void RB_InstantQuad2(vec4_t quadVerts[4], vec2_t texCoords[4])
tess.indexes[tess.numIndexes++] = 0;
tess.indexes[tess.numIndexes++] = 2;
tess.indexes[tess.numIndexes++] = 3;
tess.minIndex = 0;
tess.maxIndex = 3;
RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD);
R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
R_DrawElements(tess.numIndexes, tess.firstIndex);
tess.numIndexes = 0;
tess.numVertexes = 0;
tess.firstIndex = 0;
tess.minIndex = 0;
tess.maxIndex = 0;
}
@ -410,11 +406,53 @@ static void RB_SurfaceVertsAndIndexes( int numVerts, srfVert_t *verts, int numIn
tess.numVertexes += numVerts;
}
static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firstIndex, int minIndex, int maxIndex, int dlightBits, int pshadowBits, qboolean shaderCheck)
static qboolean RB_SurfaceVaoCached(int numVerts, srfVert_t *verts, int numIndexes, glIndex_t *indexes, int dlightBits, int pshadowBits)
{
int i, mergeForward, mergeBack;
GLvoid *firstIndexOffset, *lastIndexOffset;
qboolean recycleVertexBuffer = qfalse;
qboolean recycleIndexBuffer = qfalse;
qboolean endSurface = qfalse;
if (!(!ShaderRequiresCPUDeforms(tess.shader) && !tess.shader->isSky && !tess.shader->isPortal))
return qfalse;
if (!numIndexes || !numVerts)
return qfalse;
VaoCache_BindVao();
tess.dlightBits |= dlightBits;
tess.pshadowBits |= pshadowBits;
VaoCache_CheckAdd(&endSurface, &recycleVertexBuffer, &recycleIndexBuffer, numVerts, numIndexes);
if (endSurface)
{
RB_EndSurface();
RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
}
if (recycleVertexBuffer)
VaoCache_RecycleVertexBuffer();
if (recycleIndexBuffer)
VaoCache_RecycleIndexBuffer();
if (!tess.numVertexes)
VaoCache_InitQueue();
VaoCache_AddSurface(verts, numVerts, indexes, numIndexes);
tess.numIndexes += numIndexes;
tess.numVertexes += numVerts;
tess.useInternalVao = qfalse;
tess.useCacheVao = qtrue;
return qtrue;
}
static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firstIndex, int dlightBits, int pshadowBits, qboolean shaderCheck)
{
if (!vao)
{
return qfalse;
@ -430,98 +468,26 @@ static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firs
tess.dlightBits |= dlightBits;
tess.pshadowBits |= pshadowBits;
// merge this into any existing multidraw primitives
mergeForward = -1;
mergeBack = -1;
firstIndexOffset = BUFFER_OFFSET(firstIndex * sizeof(glIndex_t));
lastIndexOffset = BUFFER_OFFSET((firstIndex + numIndexes) * sizeof(glIndex_t));
RB_EndSurface();
RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
if (tess.multiDrawPrimitives && r_mergeMultidraws->integer)
{
i = 0;
backEnd.pc.c_staticVaoDraws++;
if (r_mergeMultidraws->integer == 1)
{
// lazy merge, only check the last primitive
i = tess.multiDrawPrimitives - 1;
}
for (; i < tess.multiDrawPrimitives; i++)
{
if (firstIndexOffset == tess.multiDrawFirstIndex[i] + tess.multiDrawNumIndexes[i])
{
mergeBack = i;
if (mergeForward != -1)
break;
}
if (lastIndexOffset == tess.multiDrawFirstIndex[i])
{
mergeForward = i;
if (mergeBack != -1)
break;
}
}
}
if (mergeBack != -1 && mergeForward == -1)
{
tess.multiDrawNumIndexes[mergeBack] += numIndexes;
tess.multiDrawMinIndex[mergeBack] = MIN(tess.multiDrawMinIndex[mergeBack], minIndex);
tess.multiDrawMaxIndex[mergeBack] = MAX(tess.multiDrawMaxIndex[mergeBack], maxIndex);
backEnd.pc.c_multidrawsMerged++;
}
else if (mergeBack == -1 && mergeForward != -1)
{
tess.multiDrawNumIndexes[mergeForward] += numIndexes;
tess.multiDrawFirstIndex[mergeForward] = firstIndexOffset;
tess.multiDrawMinIndex[mergeForward] = MIN(tess.multiDrawMinIndex[mergeForward], minIndex);
tess.multiDrawMaxIndex[mergeForward] = MAX(tess.multiDrawMaxIndex[mergeForward], maxIndex);
backEnd.pc.c_multidrawsMerged++;
}
else if (mergeBack != -1 && mergeForward != -1)
{
tess.multiDrawNumIndexes[mergeBack] += numIndexes + tess.multiDrawNumIndexes[mergeForward];
tess.multiDrawMinIndex[mergeBack] = MIN(tess.multiDrawMinIndex[mergeBack], MIN(tess.multiDrawMinIndex[mergeForward], minIndex));
tess.multiDrawMaxIndex[mergeBack] = MAX(tess.multiDrawMaxIndex[mergeBack], MAX(tess.multiDrawMaxIndex[mergeForward], maxIndex));
tess.multiDrawPrimitives--;
if (mergeForward != tess.multiDrawPrimitives)
{
tess.multiDrawNumIndexes[mergeForward] = tess.multiDrawNumIndexes[tess.multiDrawPrimitives];
tess.multiDrawFirstIndex[mergeForward] = tess.multiDrawFirstIndex[tess.multiDrawPrimitives];
tess.multiDrawMinIndex[mergeForward] = tess.multiDrawMinIndex[tess.multiDrawPrimitives];
tess.multiDrawMaxIndex[mergeForward] = tess.multiDrawMaxIndex[tess.multiDrawPrimitives];
}
backEnd.pc.c_multidrawsMerged += 2;
}
else //if (mergeBack == -1 && mergeForward == -1)
{
tess.multiDrawNumIndexes[tess.multiDrawPrimitives] = numIndexes;
tess.multiDrawFirstIndex[tess.multiDrawPrimitives] = firstIndexOffset;
tess.multiDrawMinIndex[tess.multiDrawPrimitives] = minIndex;
tess.multiDrawMaxIndex[tess.multiDrawPrimitives] = maxIndex;
tess.multiDrawPrimitives++;
}
backEnd.pc.c_multidraws++;
tess.numIndexes += numIndexes;
tess.numVertexes += numVerts;
tess.numIndexes = numIndexes;
tess.numVertexes = numVerts;
return qtrue;
}
/*
=============
RB_SurfaceTriangles
=============
*/
static void RB_SurfaceTriangles( srfBspSurface_t *srf ) {
if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
srf->indexes, srf->dlightBits, srf->pshadowBits))
{
return;
}
@ -584,8 +550,6 @@ static void RB_SurfaceBeam( void )
tess.numVertexes = 0;
tess.numIndexes = 0;
tess.firstIndex = 0;
tess.minIndex = 0;
tess.maxIndex = 0;
for ( i = 0; i <= NUM_BEAM_SEGS; i++ ) {
VectorCopy(start_points[ i % NUM_BEAM_SEGS ], tess.xyz[tess.numVertexes++]);
@ -602,9 +566,6 @@ static void RB_SurfaceBeam( void )
tess.indexes[tess.numIndexes++] = 1 + (i + 1) * 2;
}
tess.minIndex = 0;
tess.maxIndex = tess.numVertexes;
// FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function
RB_UpdateTessVao(ATTR_POSITION);
@ -614,13 +575,11 @@ static void RB_SurfaceBeam( void )
GLSL_SetUniformVec4(sp, UNIFORM_COLOR, colorRed);
R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
R_DrawElements(tess.numIndexes, tess.firstIndex);
tess.numIndexes = 0;
tess.numVertexes = 0;
tess.firstIndex = 0;
tess.minIndex = 0;
tess.maxIndex = 0;
}
//================================================================================
@ -960,8 +919,8 @@ RB_SurfaceFace
==============
*/
static void RB_SurfaceFace( srfBspSurface_t *srf ) {
if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
srf->indexes, srf->dlightBits, srf->pshadowBits))
{
return;
}
@ -1028,8 +987,8 @@ static void RB_SurfaceGrid( srfBspSurface_t *srf ) {
int pshadowBits;
//int *vDlightBits;
if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
srf->indexes, srf->dlightBits, srf->pshadowBits))
{
return;
}
@ -1280,7 +1239,7 @@ static void RB_SurfaceFlare(srfFlare_t *surf)
static void RB_SurfaceVaoMesh(srfBspSurface_t * srf)
{
RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes, srf->firstIndex,
srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qfalse );
srf->dlightBits, srf->pshadowBits, qfalse );
}
void RB_SurfaceVaoMdvMesh(srfVaoMdvMesh_t * surface)
@ -1310,8 +1269,6 @@ void RB_SurfaceVaoMdvMesh(srfVaoMdvMesh_t * surface)
tess.numIndexes = surface->numIndexes;
tess.numVertexes = surface->numVerts;
tess.minIndex = surface->minIndex;
tess.maxIndex = surface->maxIndex;
//mdvModel = surface->mdvModel;
//mdvSurface = surface->mdvSurface;

View file

@ -499,6 +499,8 @@ void R_InitVaos(void)
R_BindNullVao();
VaoCache_Init();
GL_CheckErrors();
}
@ -650,3 +652,315 @@ void RB_UpdateTessVao(unsigned int attribBits)
qglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, tess.numIndexes * sizeof(tess.indexes[0]), tess.indexes);
}
}
// FIXME: This sets a limit of 65536 verts/262144 indexes per static surface
// This is higher than the old vq3 limits but is worth noting
#define VAOCACHE_QUEUE_MAX_SURFACES (1 << 10)
#define VAOCACHE_QUEUE_MAX_VERTEXES (1 << 16)
#define VAOCACHE_QUEUE_MAX_INDEXES (VAOCACHE_QUEUE_MAX_VERTEXES * 4)
typedef struct queuedSurface_s
{
srfVert_t *vertexes;
int numVerts;
glIndex_t *indexes;
int numIndexes;
}
queuedSurface_t;
static struct
{
queuedSurface_t surfaces[VAOCACHE_QUEUE_MAX_SURFACES];
int numSurfaces;
srfVert_t vertexes[VAOCACHE_QUEUE_MAX_VERTEXES];
int vertexCommitSize;
glIndex_t indexes[VAOCACHE_QUEUE_MAX_INDEXES];
int indexCommitSize;
}
vcq;
#define VAOCACHE_MAX_SURFACES (1 << 16)
#define VAOCACHE_MAX_BATCHES (1 << 10)
// srfVert_t is 60 bytes
// assuming each vert is referenced 4 times, need 16 bytes (4 glIndex_t) per vert
// -> need about 4/15ths the space for indexes as vertexes
#define VAOCACHE_VERTEX_BUFFER_SIZE (16 * 1024 * 1024)
#define VAOCACHE_INDEX_BUFFER_SIZE (5 * 1024 * 1024)
typedef struct buffered_s
{
void *data;
int size;
int bufferOffset;
}
buffered_t;
static struct
{
vao_t *vao;
buffered_t surfaceIndexSets[VAOCACHE_MAX_SURFACES];
int numSurfaces;
int batchLengths[VAOCACHE_MAX_BATCHES];
int numBatches;
int vertexOffset;
int indexOffset;
}
vc;
void VaoCache_Commit(void)
{
buffered_t *indexSet;
int *batchLength;
queuedSurface_t *surf, *end = vcq.surfaces + vcq.numSurfaces;
R_BindVao(vc.vao);
// Search for a matching batch
// FIXME: Use faster search
indexSet = vc.surfaceIndexSets;
batchLength = vc.batchLengths;
for (; batchLength < vc.batchLengths + vc.numBatches; batchLength++)
{
if (*batchLength == vcq.numSurfaces)
{
buffered_t *indexSet2 = indexSet;
for (surf = vcq.surfaces; surf < end; surf++, indexSet2++)
{
if (surf->indexes != indexSet2->data || (surf->numIndexes * sizeof(glIndex_t)) != indexSet2->size)
break;
}
if (surf == end)
break;
}
indexSet += *batchLength;
}
// If found, use it
if (indexSet < vc.surfaceIndexSets + vc.numSurfaces)
{
tess.firstIndex = indexSet->bufferOffset / sizeof(glIndex_t);
//ri.Printf(PRINT_ALL, "firstIndex %d numIndexes %d as %d\n", tess.firstIndex, tess.numIndexes, batchLength - vc.batchLengths);
//ri.Printf(PRINT_ALL, "vc.numSurfaces %d vc.numBatches %d\n", vc.numSurfaces, vc.numBatches);
}
// If not, rebuffer the batch
// FIXME: keep track of the vertexes so we don't have to reupload them every time
else
{
srfVert_t *dstVertex = vcq.vertexes;
glIndex_t *dstIndex = vcq.indexes;
batchLength = vc.batchLengths + vc.numBatches;
*batchLength = vcq.numSurfaces;
vc.numBatches++;
tess.firstIndex = vc.indexOffset / sizeof(glIndex_t);
vcq.vertexCommitSize = 0;
vcq.indexCommitSize = 0;
for (surf = vcq.surfaces; surf < end; surf++)
{
glIndex_t *srcIndex = surf->indexes;
int vertexesSize = surf->numVerts * sizeof(srfVert_t);
int indexesSize = surf->numIndexes * sizeof(glIndex_t);
int i, indexOffset = (vc.vertexOffset + vcq.vertexCommitSize) / sizeof(srfVert_t);
Com_Memcpy(dstVertex, surf->vertexes, vertexesSize);
dstVertex += surf->numVerts;
vcq.vertexCommitSize += vertexesSize;
indexSet = vc.surfaceIndexSets + vc.numSurfaces;
indexSet->data = surf->indexes;
indexSet->size = indexesSize;
indexSet->bufferOffset = vc.indexOffset + vcq.indexCommitSize;
vc.numSurfaces++;
for (i = 0; i < surf->numIndexes; i++)
*dstIndex++ = *srcIndex++ + indexOffset;
vcq.indexCommitSize += indexesSize;
}
//ri.Printf(PRINT_ALL, "committing %d to %d, %d to %d as %d\n", vcq.vertexCommitSize, vc.vertexOffset, vcq.indexCommitSize, vc.indexOffset, batchLength - vc.batchLengths);
if (vcq.vertexCommitSize)
{
qglBindBuffer(GL_ARRAY_BUFFER, vc.vao->vertexesVBO);
qglBufferSubData(GL_ARRAY_BUFFER, vc.vertexOffset, vcq.vertexCommitSize, vcq.vertexes);
vc.vertexOffset += vcq.vertexCommitSize;
}
if (vcq.indexCommitSize)
{
qglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesIBO);
qglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, vc.indexOffset, vcq.indexCommitSize, vcq.indexes);
vc.indexOffset += vcq.indexCommitSize;
}
}
}
void VaoCache_Init(void)
{
srfVert_t vert;
int dataSize;
vc.vao = R_CreateVao("VaoCache", NULL, VAOCACHE_VERTEX_BUFFER_SIZE, NULL, VAOCACHE_INDEX_BUFFER_SIZE, VAO_USAGE_DYNAMIC);
vc.vao->attribs[ATTR_INDEX_POSITION].enabled = 1;
vc.vao->attribs[ATTR_INDEX_TEXCOORD].enabled = 1;
vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].enabled = 1;
vc.vao->attribs[ATTR_INDEX_NORMAL].enabled = 1;
vc.vao->attribs[ATTR_INDEX_TANGENT].enabled = 1;
vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].enabled = 1;
vc.vao->attribs[ATTR_INDEX_COLOR].enabled = 1;
vc.vao->attribs[ATTR_INDEX_POSITION].count = 3;
vc.vao->attribs[ATTR_INDEX_TEXCOORD].count = 2;
vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].count = 2;
vc.vao->attribs[ATTR_INDEX_NORMAL].count = 4;
vc.vao->attribs[ATTR_INDEX_TANGENT].count = 4;
vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].count = 4;
vc.vao->attribs[ATTR_INDEX_COLOR].count = 4;
vc.vao->attribs[ATTR_INDEX_POSITION].type = GL_FLOAT;
vc.vao->attribs[ATTR_INDEX_TEXCOORD].type = GL_FLOAT;
vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].type = GL_FLOAT;
vc.vao->attribs[ATTR_INDEX_NORMAL].type = GL_SHORT;
vc.vao->attribs[ATTR_INDEX_TANGENT].type = GL_SHORT;
vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].type = GL_SHORT;
vc.vao->attribs[ATTR_INDEX_COLOR].type = GL_UNSIGNED_SHORT;
vc.vao->attribs[ATTR_INDEX_POSITION].normalized = GL_FALSE;
vc.vao->attribs[ATTR_INDEX_TEXCOORD].normalized = GL_FALSE;
vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].normalized = GL_FALSE;
vc.vao->attribs[ATTR_INDEX_NORMAL].normalized = GL_TRUE;
vc.vao->attribs[ATTR_INDEX_TANGENT].normalized = GL_TRUE;
vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].normalized = GL_TRUE;
vc.vao->attribs[ATTR_INDEX_COLOR].normalized = GL_TRUE;
vc.vao->attribs[ATTR_INDEX_POSITION].offset = 0; dataSize = sizeof(vert.xyz);
vc.vao->attribs[ATTR_INDEX_TEXCOORD].offset = dataSize; dataSize += sizeof(vert.st);
vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].offset = dataSize; dataSize += sizeof(vert.lightmap);
vc.vao->attribs[ATTR_INDEX_NORMAL].offset = dataSize; dataSize += sizeof(vert.normal);
vc.vao->attribs[ATTR_INDEX_TANGENT].offset = dataSize; dataSize += sizeof(vert.tangent);
vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].offset = dataSize; dataSize += sizeof(vert.lightdir);
vc.vao->attribs[ATTR_INDEX_COLOR].offset = dataSize; dataSize += sizeof(vert.color);
vc.vao->attribs[ATTR_INDEX_POSITION].stride = dataSize;
vc.vao->attribs[ATTR_INDEX_TEXCOORD].stride = dataSize;
vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].stride = dataSize;
vc.vao->attribs[ATTR_INDEX_NORMAL].stride = dataSize;
vc.vao->attribs[ATTR_INDEX_TANGENT].stride = dataSize;
vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].stride = dataSize;
vc.vao->attribs[ATTR_INDEX_COLOR].stride = dataSize;
Vao_SetVertexPointers(vc.vao);
vc.numSurfaces = 0;
vc.numBatches = 0;
vc.vertexOffset = 0;
vc.indexOffset = 0;
vcq.vertexCommitSize = 0;
vcq.indexCommitSize = 0;
vcq.numSurfaces = 0;
}
void VaoCache_BindVao(void)
{
R_BindVao(vc.vao);
}
void VaoCache_CheckAdd(qboolean *endSurface, qboolean *recycleVertexBuffer, qboolean *recycleIndexBuffer, int numVerts, int numIndexes)
{
int vertexesSize = sizeof(srfVert_t) * numVerts;
int indexesSize = sizeof(glIndex_t) * numIndexes;
if (vc.vao->vertexesSize < vc.vertexOffset + vcq.vertexCommitSize + vertexesSize)
{
//ri.Printf(PRINT_ALL, "out of space in vertex cache: %d < %d + %d + %d\n", vc.vao->vertexesSize, vc.vertexOffset, vc.vertexCommitSize, vertexesSize);
*recycleVertexBuffer = qtrue;
*recycleIndexBuffer = qtrue;
*endSurface = qtrue;
}
if (vc.vao->indexesSize < vc.indexOffset + vcq.indexCommitSize + indexesSize)
{
//ri.Printf(PRINT_ALL, "out of space in index cache\n");
*recycleIndexBuffer = qtrue;
*endSurface = qtrue;
}
if (vc.numSurfaces + vcq.numSurfaces >= VAOCACHE_MAX_SURFACES)
{
//ri.Printf(PRINT_ALL, "out of surfaces in index cache\n");
*recycleIndexBuffer = qtrue;
*endSurface = qtrue;
}
if (vc.numBatches >= VAOCACHE_MAX_BATCHES)
{
//ri.Printf(PRINT_ALL, "out of batches in index cache\n");
*recycleIndexBuffer = qtrue;
*endSurface = qtrue;
}
if (vcq.numSurfaces >= VAOCACHE_QUEUE_MAX_SURFACES)
{
//ri.Printf(PRINT_ALL, "out of queued surfaces\n");
*endSurface = qtrue;
}
if (VAOCACHE_QUEUE_MAX_VERTEXES * sizeof(srfVert_t) < vcq.vertexCommitSize + vertexesSize)
{
//ri.Printf(PRINT_ALL, "out of queued vertexes\n");
*endSurface = qtrue;
}
if (VAOCACHE_QUEUE_MAX_INDEXES * sizeof(glIndex_t) < vcq.indexCommitSize + indexesSize)
{
//ri.Printf(PRINT_ALL, "out of queued indexes\n");
*endSurface = qtrue;
}
}
void VaoCache_RecycleVertexBuffer(void)
{
qglBindBuffer(GL_ARRAY_BUFFER, vc.vao->vertexesVBO);
qglBufferData(GL_ARRAY_BUFFER, vc.vao->vertexesSize, NULL, GL_DYNAMIC_DRAW);
vc.vertexOffset = 0;
}
void VaoCache_RecycleIndexBuffer(void)
{
qglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesIBO);
qglBufferData(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesSize, NULL, GL_DYNAMIC_DRAW);
vc.indexOffset = 0;
vc.numSurfaces = 0;
vc.numBatches = 0;
}
void VaoCache_InitQueue(void)
{
vcq.vertexCommitSize = 0;
vcq.indexCommitSize = 0;
vcq.numSurfaces = 0;
}
void VaoCache_AddSurface(srfVert_t *verts, int numVerts, glIndex_t *indexes, int numIndexes)
{
queuedSurface_t *queueEntry = vcq.surfaces + vcq.numSurfaces;
queueEntry->vertexes = verts;
queueEntry->numVerts = numVerts;
queueEntry->indexes = indexes;
queueEntry->numIndexes = numIndexes;
vcq.numSurfaces++;
vcq.vertexCommitSize += sizeof(srfVert_t) * numVerts;;
vcq.indexCommitSize += sizeof(glIndex_t) * numIndexes;
}

View file

@ -36,7 +36,7 @@ static qboolean R_CullSurface( msurface_t *surf ) {
return qfalse;
}
if ( *surf->data == SF_GRID && r_nocurves->integer ) {
if ( r_nocurves->integer && *surf->data == SF_GRID ) {
return qtrue;
}
@ -561,43 +561,23 @@ static void R_RecursiveWorldNode( mnode_t *node, uint32_t planeBits, uint32_t dl
tr.viewParms.visBounds[1][2] = node->maxs[2];
}
// add merged and unmerged surfaces
if (tr.world->viewSurfaces && !r_nocurves->integer)
view = tr.world->viewSurfaces + node->firstmarksurface;
else
view = tr.world->marksurfaces + node->firstmarksurface;
// add surfaces
view = tr.world->marksurfaces + node->firstmarksurface;
c = node->nummarksurfaces;
while (c--) {
// just mark it as visible, so we don't jump out of the cache derefencing the surface
surf = *view;
if (surf < 0)
if (tr.world->surfacesViewCount[surf] != tr.viewCount)
{
if (tr.world->mergedSurfacesViewCount[-surf - 1] != tr.viewCount)
{
tr.world->mergedSurfacesViewCount[-surf - 1] = tr.viewCount;
tr.world->mergedSurfacesDlightBits[-surf - 1] = dlightBits;
tr.world->mergedSurfacesPshadowBits[-surf - 1] = pshadowBits;
}
else
{
tr.world->mergedSurfacesDlightBits[-surf - 1] |= dlightBits;
tr.world->mergedSurfacesPshadowBits[-surf - 1] |= pshadowBits;
}
tr.world->surfacesViewCount[surf] = tr.viewCount;
tr.world->surfacesDlightBits[surf] = dlightBits;
tr.world->surfacesPshadowBits[surf] = pshadowBits;
}
else
{
if (tr.world->surfacesViewCount[surf] != tr.viewCount)
{
tr.world->surfacesViewCount[surf] = tr.viewCount;
tr.world->surfacesDlightBits[surf] = dlightBits;
tr.world->surfacesPshadowBits[surf] = pshadowBits;
}
else
{
tr.world->surfacesDlightBits[surf] |= dlightBits;
tr.world->surfacesPshadowBits[surf] |= pshadowBits;
}
tr.world->surfacesDlightBits[surf] |= dlightBits;
tr.world->surfacesPshadowBits[surf] |= pshadowBits;
}
view++;
}
@ -825,14 +805,6 @@ void R_AddWorldSurfaces (void) {
R_AddWorldSurface( tr.world->surfaces + i, tr.world->surfacesDlightBits[i], tr.world->surfacesPshadowBits[i] );
tr.refdef.dlightMask |= tr.world->surfacesDlightBits[i];
}
for (i = 0; i < tr.world->numMergedSurfaces; i++)
{
if (tr.world->mergedSurfacesViewCount[i] != tr.viewCount)
continue;
R_AddWorldSurface( tr.world->mergedSurfaces + i, tr.world->mergedSurfacesDlightBits[i], tr.world->mergedSurfacesPshadowBits[i] );
tr.refdef.dlightMask |= tr.world->mergedSurfacesDlightBits[i];
}
tr.refdef.dlightMask = ~tr.refdef.dlightMask;
}