From c65d2c2657fa6aecf498a5e1c20ffa706a80e818 Mon Sep 17 00:00:00 2001 From: SmileTheory Date: Fri, 28 Apr 2017 02:13:25 -0700 Subject: [PATCH] Add vao cache for static surfaces. Remove support for draw range elements, multi draw arrays, world vao creation, surface merging. --- code/renderercommon/qgl.h | 10 - code/renderergl2/tr_bsp.c | 425 ------------------------------- code/renderergl2/tr_cmds.c | 4 +- code/renderergl2/tr_extensions.c | 12 - code/renderergl2/tr_init.c | 4 - code/renderergl2/tr_local.h | 39 +-- code/renderergl2/tr_model.c | 3 - code/renderergl2/tr_shade.c | 112 ++------ code/renderergl2/tr_sky.c | 9 +- code/renderergl2/tr_surface.c | 165 +++++------- code/renderergl2/tr_vbo.c | 314 +++++++++++++++++++++++ code/renderergl2/tr_world.c | 46 +--- 12 files changed, 413 insertions(+), 730 deletions(-) diff --git a/code/renderercommon/qgl.h b/code/renderercommon/qgl.h index 8b7367af..3aaa0e8b 100644 --- a/code/renderercommon/qgl.h +++ b/code/renderercommon/qgl.h @@ -379,19 +379,11 @@ extern void (APIENTRYP qglUnlockArraysEXT) (void); // GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a -// OpenGL 1.2, was GL_EXT_draw_range_elements -#define QGL_1_2_PROCS \ - GLE(void, DrawRangeElements, GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) \ - // OpenGL 1.3, was GL_ARB_texture_compression #define QGL_1_3_PROCS \ GLE(void, CompressedTexImage2D, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data) \ GLE(void, CompressedTexSubImage2D, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data) \ -// OpenGL 1.4, was GL_EXT_multi_draw_arrays -#define QGL_1_4_PROCS \ - GLE(void, MultiDrawElements, GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount) \ - // OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query #define QGL_1_5_PROCS \ GLE(void, GenQueries, GLsizei n, GLuint *ids) \ @@ -640,9 +632,7 @@ extern void (APIENTRYP qglUnlockArraysEXT) (void); GLE(GLvoid, NamedFramebufferRenderbufferEXT, GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer) \ #define GLE(ret, name, ...) typedef ret APIENTRY name##proc(__VA_ARGS__); extern name##proc * qgl##name; -QGL_1_2_PROCS; QGL_1_3_PROCS; -QGL_1_4_PROCS; QGL_1_5_PROCS; QGL_2_0_PROCS; QGL_EXT_framebuffer_object_PROCS; diff --git a/code/renderergl2/tr_bsp.c b/code/renderergl2/tr_bsp.c index cf6b5734..0480f91e 100644 --- a/code/renderergl2/tr_bsp.c +++ b/code/renderergl2/tr_bsp.c @@ -1680,428 +1680,6 @@ void R_MovePatchSurfacesToHunk(void) { } -/* -================= -BSPSurfaceCompare -compare function for qsort() -================= -*/ -static int BSPSurfaceCompare(const void *a, const void *b) -{ - msurface_t *aa, *bb; - - aa = *(msurface_t **) a; - bb = *(msurface_t **) b; - - // shader first - if(aa->shader->sortedIndex < bb->shader->sortedIndex) - return -1; - - else if(aa->shader->sortedIndex > bb->shader->sortedIndex) - return 1; - - // by fogIndex - if(aa->fogIndex < bb->fogIndex) - return -1; - - else if(aa->fogIndex > bb->fogIndex) - return 1; - - // by cubemapIndex - if(aa->cubemapIndex < bb->cubemapIndex) - return -1; - - else if(aa->cubemapIndex > bb->cubemapIndex) - return 1; - - // by leaf - if (s_worldData.surfacesViewCount[aa - s_worldData.surfaces] < s_worldData.surfacesViewCount[bb - s_worldData.surfaces]) - return -1; - - else if (s_worldData.surfacesViewCount[aa - s_worldData.surfaces] > s_worldData.surfacesViewCount[bb - s_worldData.surfaces]) - return 1; - - // by surface number - if (aa < bb) - return -1; - - else if (aa > bb) - return 1; - - return 0; -} - - -static void CopyVert(const srfVert_t * in, srfVert_t * out) -{ - VectorCopy(in->xyz, out->xyz); - VectorCopy4(in->tangent, out->tangent); - VectorCopy4(in->normal, out->normal); - VectorCopy4(in->lightdir, out->lightdir); - - VectorCopy2(in->st, out->st); - VectorCopy2(in->lightmap, out->lightmap); - - VectorCopy4(in->color, out->color); -} - - -/* -=============== -R_CreateWorldVaos -=============== -*/ -static void R_CreateWorldVaos(void) -{ - int i, j, k; - - int numVerts; - srfVert_t *verts; - - int numIndexes; - glIndex_t *indexes; - - int numSortedSurfaces, numSurfaces; - msurface_t *surface, **firstSurf, **lastSurf, **currSurf; - msurface_t **surfacesSorted; - - vao_t *vao; - - int maxVboSize = 4 * 1024 * 1024; - - int startTime, endTime; - - startTime = ri.Milliseconds(); - - // mark surfaces with best matching leaf, using overlapping bounds - // using surfaceViewCount[] as leaf number, and surfacesDlightBits[] as coverage * 256 - for (i = 0; i < s_worldData.numWorldSurfaces; i++) - { - s_worldData.surfacesViewCount[i] = -1; - } - - for (i = 0; i < s_worldData.numWorldSurfaces; i++) - { - s_worldData.surfacesDlightBits[i] = 0; - } - - for (i = s_worldData.numDecisionNodes; i < s_worldData.numnodes; i++) - { - mnode_t *leaf = s_worldData.nodes + i; - - for (j = leaf->firstmarksurface; j < leaf->firstmarksurface + leaf->nummarksurfaces; j++) - { - int surfaceNum = s_worldData.marksurfaces[j]; - msurface_t *surface = s_worldData.surfaces + surfaceNum; - float coverage = 1.0f; - int iCoverage; - - for (k = 0; k < 3; k++) - { - float left, right; - - if (leaf->mins[k] > surface->cullinfo.bounds[1][k] || surface->cullinfo.bounds[0][k] > leaf->maxs[k]) - { - coverage = 0.0f; - break; - } - - left = MAX(leaf->mins[k], surface->cullinfo.bounds[0][k]); - right = MIN(leaf->maxs[k], surface->cullinfo.bounds[1][k]); - - // nudge a bit in case this is an axis aligned wall - coverage *= right - left + 1.0f/256.0f; - } - - iCoverage = coverage * 256; - - if (iCoverage > s_worldData.surfacesDlightBits[surfaceNum]) - { - s_worldData.surfacesDlightBits[surfaceNum] = iCoverage; - s_worldData.surfacesViewCount[surfaceNum] = i - s_worldData.numDecisionNodes; - } - } - } - - for (i = 0; i < s_worldData.numWorldSurfaces; i++) - { - s_worldData.surfacesDlightBits[i] = 0; - } - - // count surfaces - numSortedSurfaces = 0; - for(surface = s_worldData.surfaces; surface < s_worldData.surfaces + s_worldData.numWorldSurfaces; surface++) - { - srfBspSurface_t *bspSurf; - shader_t *shader = surface->shader; - - if (shader->isPortal || shader->isSky || ShaderRequiresCPUDeforms(shader)) - continue; - - // check for this now so we can use srfBspSurface_t* universally in the rest of the function - if (!(*surface->data == SF_FACE || *surface->data == SF_GRID || *surface->data == SF_TRIANGLES)) - continue; - - bspSurf = (srfBspSurface_t *) surface->data; - - if (!bspSurf->numIndexes || !bspSurf->numVerts) - continue; - - numSortedSurfaces++; - } - - // presort surfaces - surfacesSorted = ri.Malloc(numSortedSurfaces * sizeof(*surfacesSorted)); - - j = 0; - for(surface = s_worldData.surfaces; surface < s_worldData.surfaces + s_worldData.numWorldSurfaces; surface++) - { - srfBspSurface_t *bspSurf; - shader_t *shader = surface->shader; - - if (shader->isPortal || shader->isSky || ShaderRequiresCPUDeforms(shader)) - continue; - - // check for this now so we can use srfBspSurface_t* universally in the rest of the function - if (!(*surface->data == SF_FACE || *surface->data == SF_GRID || *surface->data == SF_TRIANGLES)) - continue; - - bspSurf = (srfBspSurface_t *) surface->data; - - if (!bspSurf->numIndexes || !bspSurf->numVerts) - continue; - - surfacesSorted[j++] = surface; - } - - qsort(surfacesSorted, numSortedSurfaces, sizeof(*surfacesSorted), BSPSurfaceCompare); - - k = 0; - for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf) - { - int currVboSize; - - // Find range of surfaces to place in a VAO by: - // - Collecting a number of surfaces which fit under maxVboSize, or - // - All the surfaces with a single shader which go over maxVboSize - currVboSize = 0; - while (currVboSize < maxVboSize && lastSurf < surfacesSorted + numSortedSurfaces) - { - int addVboSize, currShaderIndex; - - addVboSize = 0; - currShaderIndex = (*lastSurf)->shader->sortedIndex; - - for(currSurf = lastSurf; currSurf < surfacesSorted + numSortedSurfaces && (*currSurf)->shader->sortedIndex == currShaderIndex; currSurf++) - { - srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data; - - addVboSize += bspSurf->numVerts * sizeof(srfVert_t); - } - - if (currVboSize != 0 && addVboSize + currVboSize > maxVboSize) - break; - - lastSurf = currSurf; - - currVboSize += addVboSize; - } - - // count verts/indexes/surfaces - numVerts = 0; - numIndexes = 0; - numSurfaces = 0; - for (currSurf = firstSurf; currSurf < lastSurf; currSurf++) - { - srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data; - - numVerts += bspSurf->numVerts; - numIndexes += bspSurf->numIndexes; - numSurfaces++; - } - - ri.Printf(PRINT_ALL, "...calculating world VAO %d ( %i verts %i tris )\n", k, numVerts, numIndexes / 3); - - // create arrays - verts = ri.Hunk_AllocateTempMemory(numVerts * sizeof(srfVert_t)); - indexes = ri.Hunk_AllocateTempMemory(numIndexes * sizeof(glIndex_t)); - - // set up indices and copy vertices - numVerts = 0; - numIndexes = 0; - for (currSurf = firstSurf; currSurf < lastSurf; currSurf++) - { - srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data; - glIndex_t *surfIndex; - - bspSurf->firstIndex = numIndexes; - bspSurf->minIndex = numVerts + bspSurf->indexes[0]; - bspSurf->maxIndex = numVerts + bspSurf->indexes[0]; - - for(i = 0, surfIndex = bspSurf->indexes; i < bspSurf->numIndexes; i++, surfIndex++) - { - indexes[numIndexes++] = numVerts + *surfIndex; - bspSurf->minIndex = MIN(bspSurf->minIndex, numVerts + *surfIndex); - bspSurf->maxIndex = MAX(bspSurf->maxIndex, numVerts + *surfIndex); - } - - bspSurf->firstVert = numVerts; - - for(i = 0; i < bspSurf->numVerts; i++) - { - CopyVert(&bspSurf->verts[i], &verts[numVerts++]); - } - } - - vao = R_CreateVao2(va("staticBspModel%i_VAO", k), numVerts, verts, numIndexes, indexes); - - // point bsp surfaces to VAO - for (currSurf = firstSurf; currSurf < lastSurf; currSurf++) - { - srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data; - - bspSurf->vao = vao; - } - - ri.Hunk_FreeTempMemory(indexes); - ri.Hunk_FreeTempMemory(verts); - - k++; - } - - if (r_mergeLeafSurfaces->integer) - { - msurface_t *mergedSurf; - - // count merged surfaces - int numMergedSurfaces = 0, numUnmergedSurfaces = 0; - for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf) - { - for (lastSurf++ ; lastSurf < surfacesSorted + numSortedSurfaces; lastSurf++) - { - int lastSurfLeafIndex, firstSurfLeafIndex; - - if ((*lastSurf)->shader != (*firstSurf)->shader - || (*lastSurf)->fogIndex != (*firstSurf)->fogIndex - || (*lastSurf)->cubemapIndex != (*firstSurf)->cubemapIndex) - break; - - lastSurfLeafIndex = s_worldData.surfacesViewCount[*lastSurf - s_worldData.surfaces]; - firstSurfLeafIndex = s_worldData.surfacesViewCount[*firstSurf - s_worldData.surfaces]; - - if (lastSurfLeafIndex != firstSurfLeafIndex) - break; - } - - // don't merge single surfaces - if (firstSurf + 1 == lastSurf) - { - numUnmergedSurfaces++; - continue; - } - - numMergedSurfaces++; - } - - // Allocate merged surfaces - s_worldData.mergedSurfaces = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfaces) * numMergedSurfaces, h_low); - s_worldData.mergedSurfacesViewCount = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesViewCount) * numMergedSurfaces, h_low); - s_worldData.mergedSurfacesDlightBits = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesDlightBits) * numMergedSurfaces, h_low); - s_worldData.mergedSurfacesPshadowBits = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesPshadowBits) * numMergedSurfaces, h_low); - s_worldData.numMergedSurfaces = numMergedSurfaces; - - // view surfaces are like mark surfaces, except negative ones represent merged surfaces - // -1 represents 0, -2 represents 1, and so on - s_worldData.viewSurfaces = ri.Hunk_Alloc(sizeof(*s_worldData.viewSurfaces) * s_worldData.nummarksurfaces, h_low); - - // actually merge surfaces - mergedSurf = s_worldData.mergedSurfaces; - for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf) - { - srfBspSurface_t *bspSurf, *vaoSurf; - - for ( lastSurf++ ; lastSurf < surfacesSorted + numSortedSurfaces; lastSurf++) - { - int lastSurfLeafIndex, firstSurfLeafIndex; - - if ((*lastSurf)->shader != (*firstSurf)->shader - || (*lastSurf)->fogIndex != (*firstSurf)->fogIndex - || (*lastSurf)->cubemapIndex != (*firstSurf)->cubemapIndex) - break; - - lastSurfLeafIndex = s_worldData.surfacesViewCount[*lastSurf - s_worldData.surfaces]; - firstSurfLeafIndex = s_worldData.surfacesViewCount[*firstSurf - s_worldData.surfaces]; - - if (lastSurfLeafIndex != firstSurfLeafIndex) - break; - } - - // don't merge single surfaces - if (firstSurf + 1 == lastSurf) - continue; - - bspSurf = (srfBspSurface_t *)(*firstSurf)->data; - - vaoSurf = ri.Hunk_Alloc(sizeof(*vaoSurf), h_low); - memset(vaoSurf, 0, sizeof(*vaoSurf)); - vaoSurf->surfaceType = SF_VAO_MESH; - - vaoSurf->vao = bspSurf->vao; - - vaoSurf->firstIndex = bspSurf->firstIndex; - vaoSurf->minIndex = bspSurf->minIndex; - vaoSurf->maxIndex = bspSurf->maxIndex; - - ClearBounds(vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]); - for (currSurf = firstSurf; currSurf < lastSurf; currSurf++) - { - srfBspSurface_t *currBspSurf = (srfBspSurface_t *)(*currSurf)->data; - - vaoSurf->numVerts += currBspSurf->numVerts; - vaoSurf->numIndexes += currBspSurf->numIndexes; - vaoSurf->minIndex = MIN(vaoSurf->minIndex, currBspSurf->minIndex); - vaoSurf->maxIndex = MAX(vaoSurf->maxIndex, currBspSurf->maxIndex); - AddPointToBounds((*currSurf)->cullinfo.bounds[0], vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]); - AddPointToBounds((*currSurf)->cullinfo.bounds[1], vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]); - } - - VectorCopy(vaoSurf->cullBounds[0], mergedSurf->cullinfo.bounds[0]); - VectorCopy(vaoSurf->cullBounds[1], mergedSurf->cullinfo.bounds[1]); - - mergedSurf->cullinfo.type = CULLINFO_BOX; - mergedSurf->data = (surfaceType_t *)vaoSurf; - mergedSurf->fogIndex = (*firstSurf)->fogIndex; - mergedSurf->cubemapIndex = (*firstSurf)->cubemapIndex; - mergedSurf->shader = (*firstSurf)->shader; - - // change surfacesViewCount[] from leaf index to viewSurface index - 1 so we can redirect later - // subtracting 2 (viewSurface index - 1) to avoid collision with -1 (no leaf) - for (currSurf = firstSurf; currSurf < lastSurf; currSurf++) - s_worldData.surfacesViewCount[*currSurf - s_worldData.surfaces] = -((int)(mergedSurf - s_worldData.mergedSurfaces)) - 2; - - mergedSurf++; - } - - // direct viewSurfaces to merged and unmerged surfaces - for (i = 0; i < s_worldData.nummarksurfaces; i++) - { - int viewSurfaceIndex = s_worldData.surfacesViewCount[s_worldData.marksurfaces[i]] + 1; - s_worldData.viewSurfaces[i] = (viewSurfaceIndex < 0) ? viewSurfaceIndex : s_worldData.marksurfaces[i]; - } - - ri.Printf(PRINT_ALL, "Processed %d mergeable surfaces into %d merged, %d unmerged\n", - numSortedSurfaces, numMergedSurfaces, numUnmergedSurfaces); - } - - for (i = 0; i < s_worldData.numWorldSurfaces; i++) - s_worldData.surfacesViewCount[i] = -1; - - ri.Free(surfacesSorted); - - endTime = ri.Milliseconds(); - ri.Printf(PRINT_ALL, "world VAOs calculation time = %5.2f seconds\n", (endTime - startTime) / 1000.0); -} - /* =============== R_LoadSurfaces @@ -3420,9 +2998,6 @@ void RE_LoadWorldMap( const char *name ) { } } - // create static VAOS from the world - R_CreateWorldVaos(); - s_worldData.dataSize = (byte *)ri.Hunk_Alloc(0, h_low) - startMarker; // only set tr.world now that we know the entire level has loaded properly diff --git a/code/renderergl2/tr_cmds.c b/code/renderergl2/tr_cmds.c index 254bb0d7..2aefb4c9 100644 --- a/code/renderergl2/tr_cmds.c +++ b/code/renderergl2/tr_cmds.c @@ -66,8 +66,8 @@ void R_PerformanceCounters( void ) { } else if (r_speeds->integer == 7 ) { - ri.Printf( PRINT_ALL, "VAO draws: static %i dynamic %i\nMultidraws: %i merged %i\n", - backEnd.pc.c_staticVaoDraws, backEnd.pc.c_dynamicVaoDraws, backEnd.pc.c_multidraws, backEnd.pc.c_multidrawsMerged ); + ri.Printf( PRINT_ALL, "VAO draws: static %i dynamic %i\n", + backEnd.pc.c_staticVaoDraws, backEnd.pc.c_dynamicVaoDraws); ri.Printf( PRINT_ALL, "GLSL binds: %i draws: gen %i light %i fog %i dlight %i\n", backEnd.pc.c_glslShaderBinds, backEnd.pc.c_genericDraws, backEnd.pc.c_lightallDraws, backEnd.pc.c_fogDraws, backEnd.pc.c_dlightDraws); } diff --git a/code/renderergl2/tr_extensions.c b/code/renderergl2/tr_extensions.c index f0b24b78..37bbe849 100644 --- a/code/renderergl2/tr_extensions.c +++ b/code/renderergl2/tr_extensions.c @@ -31,9 +31,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include "tr_dsa.h" #define GLE(ret, name, ...) name##proc * qgl##name; -QGL_1_2_PROCS; QGL_1_3_PROCS; -QGL_1_4_PROCS; QGL_1_5_PROCS; QGL_2_0_PROCS; QGL_EXT_framebuffer_object_PROCS; @@ -67,19 +65,9 @@ void GLimp_InitExtraExtensions() // GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a #define GLE(ret, name, ...) qgl##name = (name##proc *) SDL_GL_GetProcAddress("gl" #name); - // OpenGL 1.2, was GL_EXT_draw_range_elements - QGL_1_2_PROCS; - glRefConfig.drawRangeElements = !!r_ext_draw_range_elements->integer; - ri.Printf(PRINT_ALL, result[glRefConfig.drawRangeElements], "glDrawRangeElements()"); - // OpenGL 1.3, was GL_ARB_texture_compression QGL_1_3_PROCS; - // OpenGL 1.4, was GL_EXT_multi_draw_arrays - QGL_1_4_PROCS; - glRefConfig.multiDrawArrays = !!r_ext_multi_draw_arrays->integer; - ri.Printf(PRINT_ALL, result[glRefConfig.multiDrawArrays], "glMultiDrawElements()"); - // OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query QGL_1_5_PROCS; glRefConfig.occlusionQuery = qtrue; diff --git a/code/renderergl2/tr_init.c b/code/renderergl2/tr_init.c index 2e65a52e..3f364960 100644 --- a/code/renderergl2/tr_init.c +++ b/code/renderergl2/tr_init.c @@ -98,8 +98,6 @@ cvar_t *r_ext_texture_env_add; cvar_t *r_ext_texture_filter_anisotropic; cvar_t *r_ext_max_anisotropy; -cvar_t *r_ext_draw_range_elements; -cvar_t *r_ext_multi_draw_arrays; cvar_t *r_ext_framebuffer_object; cvar_t *r_ext_texture_float; cvar_t *r_ext_framebuffer_multisample; @@ -1158,8 +1156,6 @@ void R_Register( void ) r_ext_compiled_vertex_array = ri.Cvar_Get( "r_ext_compiled_vertex_array", "1", CVAR_ARCHIVE | CVAR_LATCH); r_ext_texture_env_add = ri.Cvar_Get( "r_ext_texture_env_add", "1", CVAR_ARCHIVE | CVAR_LATCH); - r_ext_draw_range_elements = ri.Cvar_Get( "r_ext_draw_range_elements", "1", CVAR_ARCHIVE | CVAR_LATCH); - r_ext_multi_draw_arrays = ri.Cvar_Get( "r_ext_multi_draw_arrays", "1", CVAR_ARCHIVE | CVAR_LATCH); r_ext_framebuffer_object = ri.Cvar_Get( "r_ext_framebuffer_object", "1", CVAR_ARCHIVE | CVAR_LATCH); r_ext_texture_float = ri.Cvar_Get( "r_ext_texture_float", "1", CVAR_ARCHIVE | CVAR_LATCH); r_ext_framebuffer_multisample = ri.Cvar_Get( "r_ext_framebuffer_multisample", "0", CVAR_ARCHIVE | CVAR_LATCH); diff --git a/code/renderergl2/tr_local.h b/code/renderergl2/tr_local.h index e6781b73..b8f34df5 100644 --- a/code/renderergl2/tr_local.h +++ b/code/renderergl2/tr_local.h @@ -929,8 +929,6 @@ typedef struct srfBspSurface_s // BSP VBO offsets int firstVert; int firstIndex; - glIndex_t minIndex; - glIndex_t maxIndex; // static render data vao_t *vao; @@ -1005,8 +1003,6 @@ typedef struct srfVaoMdvMesh_s // backEnd stats int numIndexes; int numVerts; - glIndex_t minIndex; - glIndex_t maxIndex; // static render data vao_t *vao; @@ -1135,15 +1131,8 @@ typedef struct { int *surfacesDlightBits; int *surfacesPshadowBits; - int numMergedSurfaces; - msurface_t *mergedSurfaces; - int *mergedSurfacesViewCount; - int *mergedSurfacesDlightBits; - int *mergedSurfacesPshadowBits; - int nummarksurfaces; int *marksurfaces; - int *viewSurfaces; int numfogs; fog_t *fogs; @@ -1375,8 +1364,6 @@ typedef struct { qboolean intelGraphics; - qboolean drawRangeElements; - qboolean multiDrawArrays; qboolean occlusionQuery; int glslMajorVersion; @@ -1415,9 +1402,6 @@ typedef struct { int c_staticVaoDraws; int c_dynamicVaoDraws; - int c_multidraws; - int c_multidrawsMerged; - int c_dlightVertexes; int c_dlightIndexes; @@ -1689,8 +1673,6 @@ extern cvar_t *r_showcluster; extern cvar_t *r_gamma; extern cvar_t *r_displayRefresh; // optional display refresh option -extern cvar_t *r_ext_draw_range_elements; -extern cvar_t *r_ext_multi_draw_arrays; extern cvar_t *r_ext_framebuffer_object; extern cvar_t *r_ext_texture_float; extern cvar_t *r_ext_framebuffer_multisample; @@ -1977,8 +1959,6 @@ typedef struct stageVars vec2_t texcoords[NUM_TEXTURE_BUNDLES][SHADER_MAX_VERTEXES]; } stageVars_t; -#define MAX_MULTIDRAW_PRIMITIVES 256 - typedef struct shaderCommands_s { glIndex_t indexes[SHADER_MAX_INDEXES] QALIGN(16); @@ -1994,6 +1974,7 @@ typedef struct shaderCommands_s void *attribPointers[ATTR_INDEX_COUNT]; vao_t *vao; qboolean useInternalVao; + qboolean useCacheVao; stageVars_t svars QALIGN(16); @@ -2010,14 +1991,6 @@ typedef struct shaderCommands_s int firstIndex; int numIndexes; int numVertexes; - glIndex_t minIndex; - glIndex_t maxIndex; - - int multiDrawPrimitives; - GLsizei multiDrawNumIndexes[MAX_MULTIDRAW_PRIMITIVES]; - glIndex_t *multiDrawFirstIndex[MAX_MULTIDRAW_PRIMITIVES]; - glIndex_t multiDrawMinIndex[MAX_MULTIDRAW_PRIMITIVES]; - glIndex_t multiDrawMaxIndex[MAX_MULTIDRAW_PRIMITIVES]; // info extracted from current shader int numPasses; @@ -2032,7 +2005,7 @@ void RB_EndSurface(void); void RB_CheckOverflow( int verts, int indexes ); #define RB_CHECKOVERFLOW(v,i) if (tess.numVertexes + (v) >= SHADER_MAX_VERTEXES || tess.numIndexes + (i) >= SHADER_MAX_INDEXES ) {RB_CheckOverflow(v,i);} -void R_DrawElementsVao( int numIndexes, glIndex_t firstIndex, glIndex_t minIndex, glIndex_t maxIndex ); +void R_DrawElements( int numIndexes, glIndex_t firstIndex ); void RB_StageIteratorGeneric( void ); void RB_StageIteratorSky( void ); void RB_StageIteratorVertexLitTexture( void ); @@ -2171,6 +2144,14 @@ void R_VaoList_f(void); void RB_UpdateTessVao(unsigned int attribBits); +void VaoCache_Commit(void); +void VaoCache_Init(void); +void VaoCache_BindVao(void); +void VaoCache_CheckAdd(qboolean *endSurface, qboolean *recycleVertexBuffer, qboolean *recycleIndexBuffer, int numVerts, int numIndexes); +void VaoCache_RecycleVertexBuffer(void); +void VaoCache_RecycleIndexBuffer(void); +void VaoCache_InitQueue(void); +void VaoCache_AddSurface(srfVert_t *verts, int numVerts, glIndex_t *indexes, int numIndexes); /* ============================================================ diff --git a/code/renderergl2/tr_model.c b/code/renderergl2/tr_model.c index 1fa907a9..0fdf60d8 100644 --- a/code/renderergl2/tr_model.c +++ b/code/renderergl2/tr_model.c @@ -764,9 +764,6 @@ static qboolean R_LoadMD3(model_t * mod, int lod, void *buffer, int bufferSize, vaoSurf->numIndexes = surf->numIndexes; vaoSurf->numVerts = surf->numVerts; - vaoSurf->minIndex = 0; - vaoSurf->maxIndex = surf->numVerts - 1; - vaoSurf->vao = R_CreateVao(va("staticMD3Mesh_VAO '%s'", surf->name), data, dataSize, (byte *)surf->indexes, surf->numIndexes * sizeof(*surf->indexes), VAO_USAGE_STATIC); vaoSurf->vao->attribs[ATTR_INDEX_POSITION].enabled = 1; diff --git a/code/renderergl2/tr_shade.c b/code/renderergl2/tr_shade.c index eeeed94b..ac435fcb 100644 --- a/code/renderergl2/tr_shade.c +++ b/code/renderergl2/tr_shade.c @@ -41,42 +41,9 @@ R_DrawElements ================== */ -void R_DrawElementsVao( int numIndexes, glIndex_t firstIndex, glIndex_t minIndex, glIndex_t maxIndex ) +void R_DrawElements( int numIndexes, glIndex_t firstIndex) { - if (glRefConfig.drawRangeElements) - qglDrawRangeElements(GL_TRIANGLES, minIndex, maxIndex, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t))); - else - qglDrawElements(GL_TRIANGLES, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t))); - -} - - -static void R_DrawMultiElementsVao( int multiDrawPrimitives, glIndex_t *multiDrawMinIndex, glIndex_t *multiDrawMaxIndex, - GLsizei *multiDrawNumIndexes, glIndex_t **multiDrawFirstIndex) -{ - if (glRefConfig.multiDrawArrays && multiDrawPrimitives > 1) - { - qglMultiDrawElements(GL_TRIANGLES, multiDrawNumIndexes, GL_INDEX_TYPE, (const GLvoid **)multiDrawFirstIndex, multiDrawPrimitives); - } - else - { - int i; - - if (glRefConfig.drawRangeElements) - { - for (i = 0; i < multiDrawPrimitives; i++) - { - qglDrawRangeElements(GL_TRIANGLES, multiDrawMinIndex[i], multiDrawMaxIndex[i], multiDrawNumIndexes[i], GL_INDEX_TYPE, multiDrawFirstIndex[i]); - } - } - else - { - for (i = 0; i < multiDrawPrimitives; i++) - { - qglDrawElements(GL_TRIANGLES, multiDrawNumIndexes[i], GL_INDEX_TYPE, multiDrawFirstIndex[i]); - } - } - } + qglDrawElements(GL_TRIANGLES, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t))); } @@ -149,14 +116,7 @@ static void DrawTris (shaderCommands_t *input) { VectorSet4(color, 1, 1, 1, 1); GLSL_SetUniformVec4(sp, UNIFORM_COLOR, color); - if (input->multiDrawPrimitives) - { - R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex); - } - else - { - R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex); - } + R_DrawElements(input->numIndexes, input->firstIndex); } qglDepthRange( 0, 1 ); @@ -190,7 +150,6 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) { tess.numIndexes = 0; tess.firstIndex = 0; tess.numVertexes = 0; - tess.multiDrawPrimitives = 0; tess.shader = state; tess.fogNum = fogNum; tess.cubemapIndex = cubemapIndex; @@ -200,6 +159,7 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) { tess.numPasses = state->numUnfoggedPasses; tess.currentStageIteratorFunc = state->optimalStageIteratorFunc; tess.useInternalVao = qtrue; + tess.useCacheVao = qfalse; tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset; if (tess.shader->clampTime && tess.shaderTime >= tess.shader->clampTime) { @@ -423,15 +383,7 @@ static void ProjectDlightTexture( void ) { GL_State( GLS_ATEST_GT_0 | GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL ); } - if (tess.multiDrawPrimitives) - { - shaderCommands_t *input = &tess; - R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex); - } - else - { - R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex); - } + R_DrawElements(tess.numIndexes, tess.firstIndex); backEnd.pc.c_totalIndexes += tess.numIndexes; backEnd.pc.c_dlightIndexes += tess.numIndexes; @@ -844,14 +796,7 @@ static void ForwardDlight( void ) { // draw // - if (input->multiDrawPrimitives) - { - R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex); - } - else - { - R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex); - } + R_DrawElements(input->numIndexes, input->firstIndex); backEnd.pc.c_totalIndexes += tess.numIndexes; backEnd.pc.c_dlightIndexes += tess.numIndexes; @@ -920,14 +865,7 @@ static void ProjectPshadowVBOGLSL( void ) { // draw // - if (input->multiDrawPrimitives) - { - R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex); - } - else - { - R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex); - } + R_DrawElements(input->numIndexes, input->firstIndex); backEnd.pc.c_totalIndexes += tess.numIndexes; //backEnd.pc.c_dlightIndexes += tess.numIndexes; @@ -1002,15 +940,7 @@ static void RB_FogPass( void ) { GL_State( GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA ); } - if (tess.multiDrawPrimitives) - { - shaderCommands_t *input = &tess; - R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex); - } - else - { - R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex); - } + R_DrawElements(tess.numIndexes, tess.firstIndex); } @@ -1391,14 +1321,7 @@ static void RB_IterateStagesGeneric( shaderCommands_t *input ) // // draw // - if (input->multiDrawPrimitives) - { - R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex); - } - else - { - R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex); - } + R_DrawElements(input->numIndexes, input->firstIndex); // allow skipping out to show just lightmaps during development if ( r_lightmap->integer && ( pStage->bundle[0].isLightmap || pStage->bundle[1].isLightmap ) ) @@ -1455,14 +1378,7 @@ static void RB_RenderShadowmap( shaderCommands_t *input ) // draw // - if (input->multiDrawPrimitives) - { - R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex); - } - else - { - R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex); - } + R_DrawElements(input->numIndexes, input->firstIndex); } } } @@ -1627,7 +1543,6 @@ void RB_StageIteratorGeneric( void ) } } - /* ** RB_EndSurface */ @@ -1657,6 +1572,12 @@ void RB_EndSurface( void ) { return; } + if (tess.useCacheVao) + { + // upload indexes now + VaoCache_Commit(); + } + // // update performance counters // @@ -1683,7 +1604,6 @@ void RB_EndSurface( void ) { tess.numIndexes = 0; tess.numVertexes = 0; tess.firstIndex = 0; - tess.multiDrawPrimitives = 0; GLimp_LogComment( "----------\n" ); } diff --git a/code/renderergl2/tr_sky.c b/code/renderergl2/tr_sky.c index 1ff0c0e8..dd1fe536 100644 --- a/code/renderergl2/tr_sky.c +++ b/code/renderergl2/tr_sky.c @@ -366,8 +366,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max int s, t; int firstVertex = tess.numVertexes; //int firstIndex = tess.numIndexes; - int minIndex = tess.minIndex; - int maxIndex = tess.maxIndex; vec4_t color; //tess.numVertexes = 0; @@ -417,9 +415,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max } } - tess.minIndex = firstVertex; - tess.maxIndex = tess.numVertexes; - // FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD); /* @@ -465,7 +460,7 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max GLSL_SetUniformVec4(sp, UNIFORM_DIFFUSETEXOFFTURB, vector); } - R_DrawElementsVao(tess.numIndexes - tess.firstIndex, tess.firstIndex, tess.minIndex, tess.maxIndex); + R_DrawElements(tess.numIndexes - tess.firstIndex, tess.firstIndex); //qglDrawElements(GL_TRIANGLES, tess.numIndexes - tess.firstIndex, GL_INDEX_TYPE, BUFFER_OFFSET(tess.firstIndex * sizeof(glIndex_t))); @@ -475,8 +470,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max tess.numIndexes = tess.firstIndex; tess.numVertexes = firstVertex; tess.firstIndex = 0; - tess.minIndex = minIndex; - tess.maxIndex = maxIndex; } static void DrawSkyBox( shader_t *shader ) diff --git a/code/renderergl2/tr_surface.c b/code/renderergl2/tr_surface.c index f624a139..0fc0e0aa 100644 --- a/code/renderergl2/tr_surface.c +++ b/code/renderergl2/tr_surface.c @@ -68,7 +68,7 @@ void RB_CheckOverflow( int verts, int indexes ) { void RB_CheckVao(vao_t *vao) { - if (vao != glState.currentVao || tess.multiDrawPrimitives >= MAX_MULTIDRAW_PRIMITIVES) + if (vao != glState.currentVao) { RB_EndSurface(); RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex); @@ -208,18 +208,14 @@ void RB_InstantQuad2(vec4_t quadVerts[4], vec2_t texCoords[4]) tess.indexes[tess.numIndexes++] = 0; tess.indexes[tess.numIndexes++] = 2; tess.indexes[tess.numIndexes++] = 3; - tess.minIndex = 0; - tess.maxIndex = 3; RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD); - R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex); + R_DrawElements(tess.numIndexes, tess.firstIndex); tess.numIndexes = 0; tess.numVertexes = 0; tess.firstIndex = 0; - tess.minIndex = 0; - tess.maxIndex = 0; } @@ -410,11 +406,53 @@ static void RB_SurfaceVertsAndIndexes( int numVerts, srfVert_t *verts, int numIn tess.numVertexes += numVerts; } -static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firstIndex, int minIndex, int maxIndex, int dlightBits, int pshadowBits, qboolean shaderCheck) +static qboolean RB_SurfaceVaoCached(int numVerts, srfVert_t *verts, int numIndexes, glIndex_t *indexes, int dlightBits, int pshadowBits) { - int i, mergeForward, mergeBack; - GLvoid *firstIndexOffset, *lastIndexOffset; + qboolean recycleVertexBuffer = qfalse; + qboolean recycleIndexBuffer = qfalse; + qboolean endSurface = qfalse; + if (!(!ShaderRequiresCPUDeforms(tess.shader) && !tess.shader->isSky && !tess.shader->isPortal)) + return qfalse; + + if (!numIndexes || !numVerts) + return qfalse; + + VaoCache_BindVao(); + + tess.dlightBits |= dlightBits; + tess.pshadowBits |= pshadowBits; + + VaoCache_CheckAdd(&endSurface, &recycleVertexBuffer, &recycleIndexBuffer, numVerts, numIndexes); + + if (endSurface) + { + RB_EndSurface(); + RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex); + } + + if (recycleVertexBuffer) + VaoCache_RecycleVertexBuffer(); + + if (recycleIndexBuffer) + VaoCache_RecycleIndexBuffer(); + + if (!tess.numVertexes) + VaoCache_InitQueue(); + + VaoCache_AddSurface(verts, numVerts, indexes, numIndexes); + + tess.numIndexes += numIndexes; + tess.numVertexes += numVerts; + tess.useInternalVao = qfalse; + tess.useCacheVao = qtrue; + + return qtrue; +} + + +static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firstIndex, int dlightBits, int pshadowBits, qboolean shaderCheck) +{ if (!vao) { return qfalse; @@ -430,98 +468,26 @@ static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firs tess.dlightBits |= dlightBits; tess.pshadowBits |= pshadowBits; - // merge this into any existing multidraw primitives - mergeForward = -1; - mergeBack = -1; - firstIndexOffset = BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)); - lastIndexOffset = BUFFER_OFFSET((firstIndex + numIndexes) * sizeof(glIndex_t)); + RB_EndSurface(); + RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex); - if (tess.multiDrawPrimitives && r_mergeMultidraws->integer) - { - i = 0; + backEnd.pc.c_staticVaoDraws++; - if (r_mergeMultidraws->integer == 1) - { - // lazy merge, only check the last primitive - i = tess.multiDrawPrimitives - 1; - } - - for (; i < tess.multiDrawPrimitives; i++) - { - if (firstIndexOffset == tess.multiDrawFirstIndex[i] + tess.multiDrawNumIndexes[i]) - { - mergeBack = i; - - if (mergeForward != -1) - break; - } - - if (lastIndexOffset == tess.multiDrawFirstIndex[i]) - { - mergeForward = i; - - if (mergeBack != -1) - break; - } - } - } - - if (mergeBack != -1 && mergeForward == -1) - { - tess.multiDrawNumIndexes[mergeBack] += numIndexes; - tess.multiDrawMinIndex[mergeBack] = MIN(tess.multiDrawMinIndex[mergeBack], minIndex); - tess.multiDrawMaxIndex[mergeBack] = MAX(tess.multiDrawMaxIndex[mergeBack], maxIndex); - backEnd.pc.c_multidrawsMerged++; - } - else if (mergeBack == -1 && mergeForward != -1) - { - tess.multiDrawNumIndexes[mergeForward] += numIndexes; - tess.multiDrawFirstIndex[mergeForward] = firstIndexOffset; - tess.multiDrawMinIndex[mergeForward] = MIN(tess.multiDrawMinIndex[mergeForward], minIndex); - tess.multiDrawMaxIndex[mergeForward] = MAX(tess.multiDrawMaxIndex[mergeForward], maxIndex); - backEnd.pc.c_multidrawsMerged++; - } - else if (mergeBack != -1 && mergeForward != -1) - { - tess.multiDrawNumIndexes[mergeBack] += numIndexes + tess.multiDrawNumIndexes[mergeForward]; - tess.multiDrawMinIndex[mergeBack] = MIN(tess.multiDrawMinIndex[mergeBack], MIN(tess.multiDrawMinIndex[mergeForward], minIndex)); - tess.multiDrawMaxIndex[mergeBack] = MAX(tess.multiDrawMaxIndex[mergeBack], MAX(tess.multiDrawMaxIndex[mergeForward], maxIndex)); - tess.multiDrawPrimitives--; - - if (mergeForward != tess.multiDrawPrimitives) - { - tess.multiDrawNumIndexes[mergeForward] = tess.multiDrawNumIndexes[tess.multiDrawPrimitives]; - tess.multiDrawFirstIndex[mergeForward] = tess.multiDrawFirstIndex[tess.multiDrawPrimitives]; - tess.multiDrawMinIndex[mergeForward] = tess.multiDrawMinIndex[tess.multiDrawPrimitives]; - tess.multiDrawMaxIndex[mergeForward] = tess.multiDrawMaxIndex[tess.multiDrawPrimitives]; - } - backEnd.pc.c_multidrawsMerged += 2; - } - else //if (mergeBack == -1 && mergeForward == -1) - { - tess.multiDrawNumIndexes[tess.multiDrawPrimitives] = numIndexes; - tess.multiDrawFirstIndex[tess.multiDrawPrimitives] = firstIndexOffset; - tess.multiDrawMinIndex[tess.multiDrawPrimitives] = minIndex; - tess.multiDrawMaxIndex[tess.multiDrawPrimitives] = maxIndex; - tess.multiDrawPrimitives++; - } - - backEnd.pc.c_multidraws++; - - tess.numIndexes += numIndexes; - tess.numVertexes += numVerts; + tess.numIndexes = numIndexes; + tess.numVertexes = numVerts; return qtrue; } + /* ============= RB_SurfaceTriangles ============= */ static void RB_SurfaceTriangles( srfBspSurface_t *srf ) { - if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes, - srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) ) + if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes, + srf->indexes, srf->dlightBits, srf->pshadowBits)) { return; } @@ -584,8 +550,6 @@ static void RB_SurfaceBeam( void ) tess.numVertexes = 0; tess.numIndexes = 0; tess.firstIndex = 0; - tess.minIndex = 0; - tess.maxIndex = 0; for ( i = 0; i <= NUM_BEAM_SEGS; i++ ) { VectorCopy(start_points[ i % NUM_BEAM_SEGS ], tess.xyz[tess.numVertexes++]); @@ -602,9 +566,6 @@ static void RB_SurfaceBeam( void ) tess.indexes[tess.numIndexes++] = 1 + (i + 1) * 2; } - tess.minIndex = 0; - tess.maxIndex = tess.numVertexes; - // FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function RB_UpdateTessVao(ATTR_POSITION); @@ -614,13 +575,11 @@ static void RB_SurfaceBeam( void ) GLSL_SetUniformVec4(sp, UNIFORM_COLOR, colorRed); - R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex); + R_DrawElements(tess.numIndexes, tess.firstIndex); tess.numIndexes = 0; tess.numVertexes = 0; tess.firstIndex = 0; - tess.minIndex = 0; - tess.maxIndex = 0; } //================================================================================ @@ -960,8 +919,8 @@ RB_SurfaceFace ============== */ static void RB_SurfaceFace( srfBspSurface_t *srf ) { - if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes, - srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) ) + if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes, + srf->indexes, srf->dlightBits, srf->pshadowBits)) { return; } @@ -1028,8 +987,8 @@ static void RB_SurfaceGrid( srfBspSurface_t *srf ) { int pshadowBits; //int *vDlightBits; - if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes, - srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) ) + if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes, + srf->indexes, srf->dlightBits, srf->pshadowBits)) { return; } @@ -1280,7 +1239,7 @@ static void RB_SurfaceFlare(srfFlare_t *surf) static void RB_SurfaceVaoMesh(srfBspSurface_t * srf) { RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes, srf->firstIndex, - srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qfalse ); + srf->dlightBits, srf->pshadowBits, qfalse ); } void RB_SurfaceVaoMdvMesh(srfVaoMdvMesh_t * surface) @@ -1310,8 +1269,6 @@ void RB_SurfaceVaoMdvMesh(srfVaoMdvMesh_t * surface) tess.numIndexes = surface->numIndexes; tess.numVertexes = surface->numVerts; - tess.minIndex = surface->minIndex; - tess.maxIndex = surface->maxIndex; //mdvModel = surface->mdvModel; //mdvSurface = surface->mdvSurface; diff --git a/code/renderergl2/tr_vbo.c b/code/renderergl2/tr_vbo.c index 0ac63a40..e4d0ca37 100644 --- a/code/renderergl2/tr_vbo.c +++ b/code/renderergl2/tr_vbo.c @@ -499,6 +499,8 @@ void R_InitVaos(void) R_BindNullVao(); + VaoCache_Init(); + GL_CheckErrors(); } @@ -650,3 +652,315 @@ void RB_UpdateTessVao(unsigned int attribBits) qglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, tess.numIndexes * sizeof(tess.indexes[0]), tess.indexes); } } + +// FIXME: This sets a limit of 65536 verts/262144 indexes per static surface +// This is higher than the old vq3 limits but is worth noting +#define VAOCACHE_QUEUE_MAX_SURFACES (1 << 10) +#define VAOCACHE_QUEUE_MAX_VERTEXES (1 << 16) +#define VAOCACHE_QUEUE_MAX_INDEXES (VAOCACHE_QUEUE_MAX_VERTEXES * 4) + +typedef struct queuedSurface_s +{ + srfVert_t *vertexes; + int numVerts; + glIndex_t *indexes; + int numIndexes; +} +queuedSurface_t; + +static struct +{ + queuedSurface_t surfaces[VAOCACHE_QUEUE_MAX_SURFACES]; + int numSurfaces; + + srfVert_t vertexes[VAOCACHE_QUEUE_MAX_VERTEXES]; + int vertexCommitSize; + + glIndex_t indexes[VAOCACHE_QUEUE_MAX_INDEXES]; + int indexCommitSize; +} +vcq; + +#define VAOCACHE_MAX_SURFACES (1 << 16) +#define VAOCACHE_MAX_BATCHES (1 << 10) + +// srfVert_t is 60 bytes +// assuming each vert is referenced 4 times, need 16 bytes (4 glIndex_t) per vert +// -> need about 4/15ths the space for indexes as vertexes +#define VAOCACHE_VERTEX_BUFFER_SIZE (16 * 1024 * 1024) +#define VAOCACHE_INDEX_BUFFER_SIZE (5 * 1024 * 1024) + +typedef struct buffered_s +{ + void *data; + int size; + int bufferOffset; +} +buffered_t; + +static struct +{ + vao_t *vao; + buffered_t surfaceIndexSets[VAOCACHE_MAX_SURFACES]; + int numSurfaces; + + int batchLengths[VAOCACHE_MAX_BATCHES]; + int numBatches; + + int vertexOffset; + int indexOffset; +} +vc; + +void VaoCache_Commit(void) +{ + buffered_t *indexSet; + int *batchLength; + queuedSurface_t *surf, *end = vcq.surfaces + vcq.numSurfaces; + + R_BindVao(vc.vao); + + // Search for a matching batch + // FIXME: Use faster search + indexSet = vc.surfaceIndexSets; + batchLength = vc.batchLengths; + for (; batchLength < vc.batchLengths + vc.numBatches; batchLength++) + { + if (*batchLength == vcq.numSurfaces) + { + buffered_t *indexSet2 = indexSet; + for (surf = vcq.surfaces; surf < end; surf++, indexSet2++) + { + if (surf->indexes != indexSet2->data || (surf->numIndexes * sizeof(glIndex_t)) != indexSet2->size) + break; + } + + if (surf == end) + break; + } + + indexSet += *batchLength; + } + + // If found, use it + if (indexSet < vc.surfaceIndexSets + vc.numSurfaces) + { + tess.firstIndex = indexSet->bufferOffset / sizeof(glIndex_t); + //ri.Printf(PRINT_ALL, "firstIndex %d numIndexes %d as %d\n", tess.firstIndex, tess.numIndexes, batchLength - vc.batchLengths); + //ri.Printf(PRINT_ALL, "vc.numSurfaces %d vc.numBatches %d\n", vc.numSurfaces, vc.numBatches); + } + // If not, rebuffer the batch + // FIXME: keep track of the vertexes so we don't have to reupload them every time + else + { + srfVert_t *dstVertex = vcq.vertexes; + glIndex_t *dstIndex = vcq.indexes; + + batchLength = vc.batchLengths + vc.numBatches; + *batchLength = vcq.numSurfaces; + vc.numBatches++; + + tess.firstIndex = vc.indexOffset / sizeof(glIndex_t); + vcq.vertexCommitSize = 0; + vcq.indexCommitSize = 0; + for (surf = vcq.surfaces; surf < end; surf++) + { + glIndex_t *srcIndex = surf->indexes; + int vertexesSize = surf->numVerts * sizeof(srfVert_t); + int indexesSize = surf->numIndexes * sizeof(glIndex_t); + int i, indexOffset = (vc.vertexOffset + vcq.vertexCommitSize) / sizeof(srfVert_t); + + Com_Memcpy(dstVertex, surf->vertexes, vertexesSize); + dstVertex += surf->numVerts; + + vcq.vertexCommitSize += vertexesSize; + + indexSet = vc.surfaceIndexSets + vc.numSurfaces; + indexSet->data = surf->indexes; + indexSet->size = indexesSize; + indexSet->bufferOffset = vc.indexOffset + vcq.indexCommitSize; + vc.numSurfaces++; + + for (i = 0; i < surf->numIndexes; i++) + *dstIndex++ = *srcIndex++ + indexOffset; + + vcq.indexCommitSize += indexesSize; + } + + //ri.Printf(PRINT_ALL, "committing %d to %d, %d to %d as %d\n", vcq.vertexCommitSize, vc.vertexOffset, vcq.indexCommitSize, vc.indexOffset, batchLength - vc.batchLengths); + + if (vcq.vertexCommitSize) + { + qglBindBuffer(GL_ARRAY_BUFFER, vc.vao->vertexesVBO); + qglBufferSubData(GL_ARRAY_BUFFER, vc.vertexOffset, vcq.vertexCommitSize, vcq.vertexes); + vc.vertexOffset += vcq.vertexCommitSize; + } + + if (vcq.indexCommitSize) + { + qglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesIBO); + qglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, vc.indexOffset, vcq.indexCommitSize, vcq.indexes); + vc.indexOffset += vcq.indexCommitSize; + } + } +} + +void VaoCache_Init(void) +{ + srfVert_t vert; + int dataSize; + + vc.vao = R_CreateVao("VaoCache", NULL, VAOCACHE_VERTEX_BUFFER_SIZE, NULL, VAOCACHE_INDEX_BUFFER_SIZE, VAO_USAGE_DYNAMIC); + + vc.vao->attribs[ATTR_INDEX_POSITION].enabled = 1; + vc.vao->attribs[ATTR_INDEX_TEXCOORD].enabled = 1; + vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].enabled = 1; + vc.vao->attribs[ATTR_INDEX_NORMAL].enabled = 1; + vc.vao->attribs[ATTR_INDEX_TANGENT].enabled = 1; + vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].enabled = 1; + vc.vao->attribs[ATTR_INDEX_COLOR].enabled = 1; + + vc.vao->attribs[ATTR_INDEX_POSITION].count = 3; + vc.vao->attribs[ATTR_INDEX_TEXCOORD].count = 2; + vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].count = 2; + vc.vao->attribs[ATTR_INDEX_NORMAL].count = 4; + vc.vao->attribs[ATTR_INDEX_TANGENT].count = 4; + vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].count = 4; + vc.vao->attribs[ATTR_INDEX_COLOR].count = 4; + + vc.vao->attribs[ATTR_INDEX_POSITION].type = GL_FLOAT; + vc.vao->attribs[ATTR_INDEX_TEXCOORD].type = GL_FLOAT; + vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].type = GL_FLOAT; + vc.vao->attribs[ATTR_INDEX_NORMAL].type = GL_SHORT; + vc.vao->attribs[ATTR_INDEX_TANGENT].type = GL_SHORT; + vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].type = GL_SHORT; + vc.vao->attribs[ATTR_INDEX_COLOR].type = GL_UNSIGNED_SHORT; + + vc.vao->attribs[ATTR_INDEX_POSITION].normalized = GL_FALSE; + vc.vao->attribs[ATTR_INDEX_TEXCOORD].normalized = GL_FALSE; + vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].normalized = GL_FALSE; + vc.vao->attribs[ATTR_INDEX_NORMAL].normalized = GL_TRUE; + vc.vao->attribs[ATTR_INDEX_TANGENT].normalized = GL_TRUE; + vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].normalized = GL_TRUE; + vc.vao->attribs[ATTR_INDEX_COLOR].normalized = GL_TRUE; + + vc.vao->attribs[ATTR_INDEX_POSITION].offset = 0; dataSize = sizeof(vert.xyz); + vc.vao->attribs[ATTR_INDEX_TEXCOORD].offset = dataSize; dataSize += sizeof(vert.st); + vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].offset = dataSize; dataSize += sizeof(vert.lightmap); + vc.vao->attribs[ATTR_INDEX_NORMAL].offset = dataSize; dataSize += sizeof(vert.normal); + vc.vao->attribs[ATTR_INDEX_TANGENT].offset = dataSize; dataSize += sizeof(vert.tangent); + vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].offset = dataSize; dataSize += sizeof(vert.lightdir); + vc.vao->attribs[ATTR_INDEX_COLOR].offset = dataSize; dataSize += sizeof(vert.color); + + vc.vao->attribs[ATTR_INDEX_POSITION].stride = dataSize; + vc.vao->attribs[ATTR_INDEX_TEXCOORD].stride = dataSize; + vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].stride = dataSize; + vc.vao->attribs[ATTR_INDEX_NORMAL].stride = dataSize; + vc.vao->attribs[ATTR_INDEX_TANGENT].stride = dataSize; + vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].stride = dataSize; + vc.vao->attribs[ATTR_INDEX_COLOR].stride = dataSize; + + Vao_SetVertexPointers(vc.vao); + + vc.numSurfaces = 0; + vc.numBatches = 0; + vc.vertexOffset = 0; + vc.indexOffset = 0; + vcq.vertexCommitSize = 0; + vcq.indexCommitSize = 0; + vcq.numSurfaces = 0; +} + +void VaoCache_BindVao(void) +{ + R_BindVao(vc.vao); +} + +void VaoCache_CheckAdd(qboolean *endSurface, qboolean *recycleVertexBuffer, qboolean *recycleIndexBuffer, int numVerts, int numIndexes) +{ + int vertexesSize = sizeof(srfVert_t) * numVerts; + int indexesSize = sizeof(glIndex_t) * numIndexes; + + if (vc.vao->vertexesSize < vc.vertexOffset + vcq.vertexCommitSize + vertexesSize) + { + //ri.Printf(PRINT_ALL, "out of space in vertex cache: %d < %d + %d + %d\n", vc.vao->vertexesSize, vc.vertexOffset, vc.vertexCommitSize, vertexesSize); + *recycleVertexBuffer = qtrue; + *recycleIndexBuffer = qtrue; + *endSurface = qtrue; + } + + if (vc.vao->indexesSize < vc.indexOffset + vcq.indexCommitSize + indexesSize) + { + //ri.Printf(PRINT_ALL, "out of space in index cache\n"); + *recycleIndexBuffer = qtrue; + *endSurface = qtrue; + } + + if (vc.numSurfaces + vcq.numSurfaces >= VAOCACHE_MAX_SURFACES) + { + //ri.Printf(PRINT_ALL, "out of surfaces in index cache\n"); + *recycleIndexBuffer = qtrue; + *endSurface = qtrue; + } + + if (vc.numBatches >= VAOCACHE_MAX_BATCHES) + { + //ri.Printf(PRINT_ALL, "out of batches in index cache\n"); + *recycleIndexBuffer = qtrue; + *endSurface = qtrue; + } + + if (vcq.numSurfaces >= VAOCACHE_QUEUE_MAX_SURFACES) + { + //ri.Printf(PRINT_ALL, "out of queued surfaces\n"); + *endSurface = qtrue; + } + + if (VAOCACHE_QUEUE_MAX_VERTEXES * sizeof(srfVert_t) < vcq.vertexCommitSize + vertexesSize) + { + //ri.Printf(PRINT_ALL, "out of queued vertexes\n"); + *endSurface = qtrue; + } + + if (VAOCACHE_QUEUE_MAX_INDEXES * sizeof(glIndex_t) < vcq.indexCommitSize + indexesSize) + { + //ri.Printf(PRINT_ALL, "out of queued indexes\n"); + *endSurface = qtrue; + } +} + +void VaoCache_RecycleVertexBuffer(void) +{ + qglBindBuffer(GL_ARRAY_BUFFER, vc.vao->vertexesVBO); + qglBufferData(GL_ARRAY_BUFFER, vc.vao->vertexesSize, NULL, GL_DYNAMIC_DRAW); + vc.vertexOffset = 0; +} + +void VaoCache_RecycleIndexBuffer(void) +{ + qglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesIBO); + qglBufferData(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesSize, NULL, GL_DYNAMIC_DRAW); + vc.indexOffset = 0; + vc.numSurfaces = 0; + vc.numBatches = 0; +} + +void VaoCache_InitQueue(void) +{ + vcq.vertexCommitSize = 0; + vcq.indexCommitSize = 0; + vcq.numSurfaces = 0; +} + +void VaoCache_AddSurface(srfVert_t *verts, int numVerts, glIndex_t *indexes, int numIndexes) +{ + queuedSurface_t *queueEntry = vcq.surfaces + vcq.numSurfaces; + queueEntry->vertexes = verts; + queueEntry->numVerts = numVerts; + queueEntry->indexes = indexes; + queueEntry->numIndexes = numIndexes; + vcq.numSurfaces++; + + vcq.vertexCommitSize += sizeof(srfVert_t) * numVerts;; + vcq.indexCommitSize += sizeof(glIndex_t) * numIndexes; +} diff --git a/code/renderergl2/tr_world.c b/code/renderergl2/tr_world.c index c4b15339..3821bee2 100644 --- a/code/renderergl2/tr_world.c +++ b/code/renderergl2/tr_world.c @@ -36,7 +36,7 @@ static qboolean R_CullSurface( msurface_t *surf ) { return qfalse; } - if ( *surf->data == SF_GRID && r_nocurves->integer ) { + if ( r_nocurves->integer && *surf->data == SF_GRID ) { return qtrue; } @@ -561,43 +561,23 @@ static void R_RecursiveWorldNode( mnode_t *node, uint32_t planeBits, uint32_t dl tr.viewParms.visBounds[1][2] = node->maxs[2]; } - // add merged and unmerged surfaces - if (tr.world->viewSurfaces && !r_nocurves->integer) - view = tr.world->viewSurfaces + node->firstmarksurface; - else - view = tr.world->marksurfaces + node->firstmarksurface; + // add surfaces + view = tr.world->marksurfaces + node->firstmarksurface; c = node->nummarksurfaces; while (c--) { // just mark it as visible, so we don't jump out of the cache derefencing the surface surf = *view; - if (surf < 0) + if (tr.world->surfacesViewCount[surf] != tr.viewCount) { - if (tr.world->mergedSurfacesViewCount[-surf - 1] != tr.viewCount) - { - tr.world->mergedSurfacesViewCount[-surf - 1] = tr.viewCount; - tr.world->mergedSurfacesDlightBits[-surf - 1] = dlightBits; - tr.world->mergedSurfacesPshadowBits[-surf - 1] = pshadowBits; - } - else - { - tr.world->mergedSurfacesDlightBits[-surf - 1] |= dlightBits; - tr.world->mergedSurfacesPshadowBits[-surf - 1] |= pshadowBits; - } + tr.world->surfacesViewCount[surf] = tr.viewCount; + tr.world->surfacesDlightBits[surf] = dlightBits; + tr.world->surfacesPshadowBits[surf] = pshadowBits; } else { - if (tr.world->surfacesViewCount[surf] != tr.viewCount) - { - tr.world->surfacesViewCount[surf] = tr.viewCount; - tr.world->surfacesDlightBits[surf] = dlightBits; - tr.world->surfacesPshadowBits[surf] = pshadowBits; - } - else - { - tr.world->surfacesDlightBits[surf] |= dlightBits; - tr.world->surfacesPshadowBits[surf] |= pshadowBits; - } + tr.world->surfacesDlightBits[surf] |= dlightBits; + tr.world->surfacesPshadowBits[surf] |= pshadowBits; } view++; } @@ -825,14 +805,6 @@ void R_AddWorldSurfaces (void) { R_AddWorldSurface( tr.world->surfaces + i, tr.world->surfacesDlightBits[i], tr.world->surfacesPshadowBits[i] ); tr.refdef.dlightMask |= tr.world->surfacesDlightBits[i]; } - for (i = 0; i < tr.world->numMergedSurfaces; i++) - { - if (tr.world->mergedSurfacesViewCount[i] != tr.viewCount) - continue; - - R_AddWorldSurface( tr.world->mergedSurfaces + i, tr.world->mergedSurfacesDlightBits[i], tr.world->mergedSurfacesPshadowBits[i] ); - tr.refdef.dlightMask |= tr.world->mergedSurfacesDlightBits[i]; - } tr.refdef.dlightMask = ~tr.refdef.dlightMask; }