From 325e893376bef7db54ff9cf04911e7eba0ef03af Mon Sep 17 00:00:00 2001 From: Yamagi Burmeister Date: Wed, 1 May 2019 19:13:56 +0200 Subject: [PATCH 1/7] Don't create lightmaps and set SURF_DRAWSKY for SURF_SKY surfaces. The software renderer already did this, but not the GL renderers. Maybe the logic was lost somewhere on the long way... Without this change a fullbright lightmap is generated for SURF_SKY surfaces and without the SURF_DRAWSKY flags the surfaces aren't skipped in RecursiveLightPoint() and GL3_LM_CreateSurfaceLightmap(). This isn't a problem under real skyboxes, but in cases were SURF_SKY is abused fpr interior lightning. rmine2.bsp in rogue is a good place to see the problem Reported by @m-x-d, fixes #393. --- src/client/refresh/gl1/gl1_model.c | 8 ++++++-- src/client/refresh/gl3/gl3_model.c | 5 +++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/client/refresh/gl1/gl1_model.c b/src/client/refresh/gl1/gl1_model.c index 64fe649d..0af3fe76 100644 --- a/src/client/refresh/gl1/gl1_model.c +++ b/src/client/refresh/gl1/gl1_model.c @@ -655,9 +655,13 @@ Mod_LoadFaces(lump_t *l) R_SubdivideSurface(out); /* cut up polygon for warps */ } + if (out->texinfo->flags & SURF_SKY) + { + out->flags |= SURF_DRAWSKY; + } + /* create lightmaps and polygons */ - if (!(out->texinfo->flags & - (SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP))) + if (!(out->texinfo->flags & (SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP))) { LM_CreateSurfaceLightmap(out); } diff --git a/src/client/refresh/gl3/gl3_model.c b/src/client/refresh/gl3/gl3_model.c index 27ddb630..535325d4 100644 --- a/src/client/refresh/gl3/gl3_model.c +++ b/src/client/refresh/gl3/gl3_model.c @@ -541,6 +541,11 @@ Mod_LoadFaces(lump_t *l) GL3_SubdivideSurface(out, loadmodel); /* cut up polygon for warps */ } + if (out->texinfo->flags & SURF_SKY) + { + out->flags |= SURF_DRAWSKY; + } + /* create lightmaps and polygons */ if (!(out->texinfo->flags & (SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP))) { From 401ec04691ead017f5a1c27fcd8ade1fb8f094af Mon Sep 17 00:00:00 2001 From: Yamagi Burmeister Date: Sat, 4 May 2019 17:23:20 +0200 Subject: [PATCH 2/7] Make the SURF_DRAWSKY fix committed in 325e893 optional. The developers tested their maps without the fix and decided that it looked good. Add a new cvar gl_fixsurfsky defaulting to 0 that enables the fix if someone really want it. --- src/client/refresh/gl1/gl1_model.c | 9 +++++++-- src/client/refresh/gl3/gl3_model.c | 9 +++++++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/src/client/refresh/gl1/gl1_model.c b/src/client/refresh/gl1/gl1_model.c index 0af3fe76..c92031f9 100644 --- a/src/client/refresh/gl1/gl1_model.c +++ b/src/client/refresh/gl1/gl1_model.c @@ -577,6 +577,8 @@ Mod_LoadFaces(lump_t *l) int planenum, side; int ti; + cvar_t* gl_fixsurfsky = ri.Cvar_Get("gl_fixsurfsky", "0", CVAR_ARCHIVE); + in = (void *)(mod_base + l->fileofs); if (l->filelen % sizeof(*in)) @@ -655,9 +657,12 @@ Mod_LoadFaces(lump_t *l) R_SubdivideSurface(out); /* cut up polygon for warps */ } - if (out->texinfo->flags & SURF_SKY) + if (gl_fixsurfsky->value) { - out->flags |= SURF_DRAWSKY; + if (out->texinfo->flags & SURF_SKY) + { + out->flags |= SURF_DRAWSKY; + } } /* create lightmaps and polygons */ diff --git a/src/client/refresh/gl3/gl3_model.c b/src/client/refresh/gl3/gl3_model.c index 535325d4..6964e50c 100644 --- a/src/client/refresh/gl3/gl3_model.c +++ b/src/client/refresh/gl3/gl3_model.c @@ -463,6 +463,8 @@ Mod_LoadFaces(lump_t *l) int planenum, side; int ti; + cvar_t* gl_fixsurfsky = ri.Cvar_Get("gl_fixsurfsky", "0", CVAR_ARCHIVE); + in = (void *)(mod_base + l->fileofs); if (l->filelen % sizeof(*in)) @@ -541,9 +543,12 @@ Mod_LoadFaces(lump_t *l) GL3_SubdivideSurface(out, loadmodel); /* cut up polygon for warps */ } - if (out->texinfo->flags & SURF_SKY) + if (gl_fixsurfsky->value) { - out->flags |= SURF_DRAWSKY; + if (out->texinfo->flags & SURF_SKY) + { + out->flags |= SURF_DRAWSKY; + } } /* create lightmaps and polygons */ From 44e8088f7d034e426be8ba634a5f78e6b4fd3e3b Mon Sep 17 00:00:00 2001 From: Yamagi Burmeister Date: Sat, 4 May 2019 17:35:19 +0200 Subject: [PATCH 3/7] Document gl_fixsurfsky. --- doc/04_cvarlist.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/04_cvarlist.md b/doc/04_cvarlist.md index 39189bd5..c2f01221 100644 --- a/doc/04_cvarlist.md +++ b/doc/04_cvarlist.md @@ -177,6 +177,12 @@ it's `+set busywait 0` (setting the `busywait` cvar) and `-portable` and `16`. Anisotropic filtering gives a huge improvement to texture quality by a negligible performance impact. +* **gl_fixsurfsky**: Some maps misuse sky surfaces for interior + lightning. The original renderer had a bug that made such surfaces + mess up the lightning of entities near them. If set to `0` (the + default) the bug is there and maps look like their developers + intended. If set to `1` the bug is fixed and the lightning correct. + * **gl_msaa_samples**: Full scene anti aliasing samples. The number of samples depends on the GPU driver, most drivers support at least `2`, `4` and `8` samples. If an invalid value is set, the value is reverted From 7b4dc000ad7ec326a3d1b699cca821b6331f010e Mon Sep 17 00:00:00 2001 From: Daniel Gibson Date: Mon, 1 Apr 2019 17:13:24 +0200 Subject: [PATCH 4/7] Unify buffering data and drawing with gl3state.v[ab]ao3D use GL3_BufferAndDraw3D() instead of glBufferData() and glDrawArrays() in each place it's needed. This by itself doesn't make anything faster, but it will make trying out different ways to upload data easier. --- src/client/refresh/gl3/gl3_main.c | 24 ++++++++++++++++-------- src/client/refresh/gl3/gl3_surf.c | 8 +++----- src/client/refresh/gl3/gl3_warp.c | 7 ++----- src/client/refresh/gl3/header/local.h | 4 +++- 4 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/client/refresh/gl3/gl3_main.c b/src/client/refresh/gl3/gl3_main.c index d65c481c..676af929 100644 --- a/src/client/refresh/gl3/gl3_main.c +++ b/src/client/refresh/gl3/gl3_main.c @@ -583,6 +583,18 @@ GL3_Shutdown(void) GL3_ShutdownContext(); } +// assumes gl3state.v[ab]o3D are bound +// buffers and draws gl3_3D_vtx_t vertices +// drawMode is something like GL_TRIANGLE_STRIP or GL_TRIANGLE_FAN or whatever +void +GL3_BufferAndDraw3D(const gl3_3D_vtx_t* verts, int numVerts, GLenum drawMode) +{ + // TODO: do something more efficient, maybe with glMapBufferRange() + GL_MAP_UNSYNCHRONIZED_BIT + // and glBindBufferRange() + glBufferData( GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*numVerts, verts, GL_STREAM_DRAW ); + glDrawArrays( drawMode, 0, numVerts ); +} + static void GL3_DrawBeam(entity_t *e) { @@ -658,8 +670,7 @@ GL3_DrawBeam(entity_t *e) GL3_BindVAO(gl3state.vao3D); GL3_BindVBO(gl3state.vbo3D); - glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STREAM_DRAW); - glDrawArrays( GL_TRIANGLE_STRIP, 0, NUM_BEAM_SEGS*4 ); + GL3_BufferAndDraw3D(verts, NUM_BEAM_SEGS*4, GL_TRIANGLE_STRIP); glDisable(GL_BLEND); glDepthMask(GL_TRUE); @@ -734,8 +745,7 @@ GL3_DrawSpriteModel(entity_t *e) GL3_BindVAO(gl3state.vao3D); GL3_BindVBO(gl3state.vbo3D); - glBufferData(GL_ARRAY_BUFFER, 4*sizeof(gl3_3D_vtx_t), verts, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + GL3_BufferAndDraw3D(verts, 4, GL_TRIANGLE_FAN); if (alpha != 1.0F) { @@ -779,16 +789,14 @@ GL3_DrawNullModel(void) {{16 * cos( 4 * M_PI / 2 ), 16 * sin( 4 * M_PI / 2 ), 0}, {0,0}, {0,0}} }; - glBufferData(GL_ARRAY_BUFFER, sizeof(vtxA), vtxA, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, 6); + GL3_BufferAndDraw3D(vtxA, 6, GL_TRIANGLE_FAN); gl3_3D_vtx_t vtxB[6] = { {{0, 0, 16}, {0,0}, {0,0}}, vtxA[5], vtxA[4], vtxA[3], vtxA[2], vtxA[1] }; - glBufferData(GL_ARRAY_BUFFER, sizeof(vtxB), vtxB, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, 6); + GL3_BufferAndDraw3D(vtxB, 6, GL_TRIANGLE_FAN); gl3state.uni3DData.transModelMat4 = origModelMat; GL3_UpdateUBO3D(); diff --git a/src/client/refresh/gl3/gl3_surf.c b/src/client/refresh/gl3/gl3_surf.c index 95835189..16d0be60 100644 --- a/src/client/refresh/gl3/gl3_surf.c +++ b/src/client/refresh/gl3/gl3_surf.c @@ -44,7 +44,7 @@ extern int numgl3textures; void GL3_SurfInit(void) { // init the VAO and VBO for the standard vertexdata: 10 floats and 1 uint - // (X, Y, Z), (S, T), (LMS, LMT), (normX, normY, normZ) - last two groups for lightmap/dynlights + // (X, Y, Z), (S, T), (LMS, LMT), (normX, normY, normZ) ; lightFlags - last two groups for lightmap/dynlights glGenVertexArrays(1, &gl3state.vao3D); GL3_BindVAO(gl3state.vao3D); @@ -212,9 +212,8 @@ GL3_DrawGLPoly(msurface_t *fa) GL3_BindVAO(gl3state.vao3D); GL3_BindVBO(gl3state.vbo3D); - glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*p->numverts, p->vertices, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, p->numverts); + GL3_BufferAndDraw3D(p->vertices, p->numverts, GL_TRIANGLE_FAN); } void @@ -241,8 +240,7 @@ GL3_DrawGLFlowingPoly(msurface_t *fa) GL3_BindVAO(gl3state.vao3D); GL3_BindVBO(gl3state.vbo3D); - glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*p->numverts, p->vertices, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, p->numverts); + GL3_BufferAndDraw3D(p->vertices, p->numverts, GL_TRIANGLE_FAN); } static void diff --git a/src/client/refresh/gl3/gl3_warp.c b/src/client/refresh/gl3/gl3_warp.c index aa721d8c..5062231b 100644 --- a/src/client/refresh/gl3/gl3_warp.c +++ b/src/client/refresh/gl3/gl3_warp.c @@ -255,9 +255,7 @@ GL3_EmitWaterPolys(msurface_t *fa) for (bp = fa->polys; bp != NULL; bp = bp->next) { - int numverts = bp->numverts; - glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*numverts, bp->vertices, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, numverts); + GL3_BufferAndDraw3D(bp->vertices, bp->numverts, GL_TRIANGLE_FAN); } } @@ -726,8 +724,7 @@ GL3_DrawSkyBox(void) MakeSkyVec( skymaxs [ 0 ] [ i ], skymaxs [ 1 ] [ i ], i, &skyVertices[2] ); MakeSkyVec( skymaxs [ 0 ] [ i ], skymins [ 1 ] [ i ], i, &skyVertices[3] ); - glBufferData(GL_ARRAY_BUFFER, sizeof(skyVertices), skyVertices, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + GL3_BufferAndDraw3D(skyVertices, 4, GL_TRIANGLE_FAN); } // glPopMatrix(); diff --git a/src/client/refresh/gl3/header/local.h b/src/client/refresh/gl3/header/local.h index a94e8a3d..3920aaa9 100644 --- a/src/client/refresh/gl3/header/local.h +++ b/src/client/refresh/gl3/header/local.h @@ -225,7 +225,7 @@ typedef struct // NOTE: make sure siParticle is always the last shaderInfo (or adapt GL3_ShutdownShaders()) gl3ShaderInfo_t siParticle; // for particles. surprising, right? - GLuint vao3D, vbo3D; // for brushes etc, using 1 floats as vertex input (x,y,z, s,t, lms,lmt, normX,normY,normZ) + GLuint vao3D, vbo3D; // for brushes etc, using 10 floats and one uint as vertex input (x,y,z, s,t, lms,lmt, normX,normY,normZ ; lightFlags) GLuint vaoAlias, vboAlias, eboAlias; // for models, using 9 floats as (x,y,z, s,t, r,g,b,a) GLuint vaoParticle, vboParticle; // for particles, using 9 floats (x,y,z, size,distance, r,g,b,a) @@ -350,6 +350,8 @@ GL3_BindEBO(GLuint ebo) } } +extern void GL3_BufferAndDraw3D(const gl3_3D_vtx_t* verts, int numVerts, GLenum drawMode); + extern qboolean GL3_CullBox(vec3_t mins, vec3_t maxs); extern void GL3_RotateForEntity(entity_t *e); From 26a461575baa09c90b1285c556d2a63a3c2c43a6 Mon Sep 17 00:00:00 2001 From: Daniel Gibson Date: Sun, 28 Apr 2019 06:14:50 +0200 Subject: [PATCH 5/7] Try to make GL3_BufferAndDraw3D() faster on AMD/Windows Seems like AMDs Windows driver doesn't like it when we call glBufferData() *a lot* (other drivers, incl. Intels, don't seem to care as much). Even on an i7-4771 with a Radeon RX 580 I couldn't get stable 60fps on Windows without this workaround (the open source Linux driver is ok). This workaround can be enabled/disabled with the gl3_usebigvbo cvar; by default it's -1 which means "enable if AMD driver is detected". Enabling it when using a nvidia GPU with their proprietary drivers reduces the performance to 1/3 of the fps we get without it, so it indeed needs to be conditional... --- src/client/refresh/gl3/gl3_main.c | 104 +++++++++++++++++++++++++- src/client/refresh/gl3/gl3_sdl.c | 9 +++ src/client/refresh/gl3/gl3_surf.c | 7 ++ src/client/refresh/gl3/header/local.h | 7 ++ 4 files changed, 123 insertions(+), 4 deletions(-) diff --git a/src/client/refresh/gl3/gl3_main.c b/src/client/refresh/gl3/gl3_main.c index 676af929..182410d7 100644 --- a/src/client/refresh/gl3/gl3_main.c +++ b/src/client/refresh/gl3/gl3_main.c @@ -121,6 +121,7 @@ cvar_t *r_modulate; cvar_t *gl_lightmap; cvar_t *gl_shadows; cvar_t *gl3_debugcontext; +cvar_t *gl3_usebigvbo; // Yaw-Pitch-Roll // equivalent to R_z * R_y * R_x where R_x is the trans matrix for rotating around X axis for aroundXdeg @@ -206,6 +207,11 @@ GL3_Register(void) gl3_particle_fade_factor = ri.Cvar_Get("gl3_particle_fade_factor", "1.2", CVAR_ARCHIVE); gl3_particle_square = ri.Cvar_Get("gl3_particle_square", "0", CVAR_ARCHIVE); + // 0: use lots of calls to glBufferData() + // 1: reduce calls to glBufferData() with one big VBO (see GL3_BufferAndDraw3D()) + // -1: auto (let yq2 choose to enable/disable this based on detected driver) + gl3_usebigvbo = ri.Cvar_Get("gl3_usebigvbo", "-1", CVAR_ARCHIVE); + r_norefresh = ri.Cvar_Get("r_norefresh", "0", 0); r_drawentities = ri.Cvar_Get("r_drawentities", "1", 0); r_drawworld = ri.Cvar_Get("r_drawworld", "1", 0); @@ -530,6 +536,34 @@ GL3_Init(void) R_Printf(PRINT_ALL, " - OpenGL Debug Output: Not Supported\n"); } + gl3config.useBigVBO = false; + if(gl3_usebigvbo->value == 1.0f) + { + R_Printf(PRINT_ALL, "Enabling useBigVBO workaround because gl3_usebigvbo = 1\n"); + gl3config.useBigVBO = true; + } + else if(gl3_usebigvbo->value == -1.0f) + { + // enable for AMDs proprietary Windows and Linux drivers + // TODO: should we match a version number? does the workaround + // slow down legacy drivers that work fine without it? + // This workaround is is tested with the following configuration: + // RX580, Win10, driver version "Adrenalin 2019 Edition 19.4.x" +#ifdef _WIN32 + if(gl3config.vendor_string != NULL && strstr(gl3config.vendor_string, "ATI") != NULL) + { + R_Printf(PRINT_ALL, "Detected AMD Windows GPU driver, enabling useBigVBO workaround\n"); + gl3config.useBigVBO = true; + } +#elif defined(__linux__) + if(gl3config.vendor_string != NULL && strstr(gl3config.vendor_string, "Advanced Micro Devices, Inc.") != NULL) + { + R_Printf(PRINT_ALL, "Detected proprietary AMD GPU driver, enabling useBigVBO workaround\n"); + gl3config.useBigVBO = true; + } +#endif + } + // generate texture handles for all possible lightmaps glGenTextures(MAX_LIGHTMAPS*MAX_LIGHTMAPS_PER_SURFACE, gl3state.lightmap_textureIDs[0]); @@ -589,10 +623,72 @@ GL3_Shutdown(void) void GL3_BufferAndDraw3D(const gl3_3D_vtx_t* verts, int numVerts, GLenum drawMode) { - // TODO: do something more efficient, maybe with glMapBufferRange() + GL_MAP_UNSYNCHRONIZED_BIT - // and glBindBufferRange() - glBufferData( GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*numVerts, verts, GL_STREAM_DRAW ); - glDrawArrays( drawMode, 0, numVerts ); + if(!gl3config.useBigVBO) + { + glBufferData( GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*numVerts, verts, GL_STREAM_DRAW ); + glDrawArrays( drawMode, 0, numVerts ); + } + else // gl3config.useBigVBO == true + { + /* + * For some reason, AMD's Windows driver doesn't seem to like lots of + * calls to glBufferData() (some of them seem to take very long then). + * GL3_BufferAndDraw3D() is called a lot when drawing world geometry + * (once for each visible face I think?). + * The simple code above caused noticeable slowdowns - even a fast + * quadcore CPU and a Radeon RX580 weren't able to maintain 60fps.. + * The workaround is to not call glBufferData() with small data all the time, + * but to allocate a big buffer and on each call to GL3_BufferAndDraw3D() + * to use a different region of that buffer, resulting in a lot less calls + * to glBufferData() (=> a lot less buffer allocations in the driver). + * Only when the buffer is full and at the end of a frame (=> GL3_EndFrame()) + * we get a fresh buffer. + * + * BTW, we couldn't observe this kind of problem with any other driver: + * Neither nvidias driver, nor AMDs or Intels Open Source Linux drivers, + * not even Intels Windows driver seem to care that much about the + * glBufferData() calls.. However, at least nvidias driver doesn't like + * this workaround (with glMapBufferRange()), the framerate dropped + * significantly - that's why both methods are available and + * selectable at runtime. + */ +#if 0 + // I /think/ doing it with glBufferSubData() didn't really help + const int bufSize = gl3state.vbo3Dsize; + int neededSize = numVerts*sizeof(gl3_3D_vtx_t); + int curOffset = gl3state.vbo3DcurOffset; + if(curOffset + neededSize > gl3state.vbo3Dsize) + curOffset = 0; + int curIdx = curOffset / sizeof(gl3_3D_vtx_t); + + gl3state.vbo3DcurOffset = curOffset + neededSize; + + glBufferSubData( GL_ARRAY_BUFFER, curOffset, neededSize, verts ); + glDrawArrays( drawMode, curIdx, numVerts ); +#else + int curOffset = gl3state.vbo3DcurOffset; + int neededSize = numVerts*sizeof(gl3_3D_vtx_t); + if(curOffset+neededSize > gl3state.vbo3Dsize) + { + // buffer is full, need to start again from the beginning + // => need to sync or get fresh buffer + // (getting fresh buffer seems easier) + glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW); + curOffset = 0; + } + + // as we make sure to use a previously unused part of the buffer, + // doing it unsynchronized should be safe.. + GLbitfield accessBits = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT; + void* data = glMapBufferRange(GL_ARRAY_BUFFER, curOffset, neededSize, accessBits); + memcpy(data, verts, neededSize); + glUnmapBuffer(GL_ARRAY_BUFFER); + + glDrawArrays(drawMode, curOffset/sizeof(gl3_3D_vtx_t), numVerts); + + gl3state.vbo3DcurOffset = curOffset + neededSize; // TODO: padding or sth needed? +#endif + } } static void diff --git a/src/client/refresh/gl3/gl3_sdl.c b/src/client/refresh/gl3/gl3_sdl.c index cc89c71b..75296c6f 100644 --- a/src/client/refresh/gl3/gl3_sdl.c +++ b/src/client/refresh/gl3/gl3_sdl.c @@ -98,6 +98,15 @@ DebugCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei le */ void GL3_EndFrame(void) { + if(gl3config.useBigVBO) + { + // I think this is a good point to orphan the VBO and get a fresh one + GL3_BindVAO(gl3state.vao3D); + GL3_BindVBO(gl3state.vbo3D); + glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW); + gl3state.vbo3DcurOffset = 0; + } + SDL_GL_SwapWindow(window); } diff --git a/src/client/refresh/gl3/gl3_surf.c b/src/client/refresh/gl3/gl3_surf.c index 16d0be60..a256cb67 100644 --- a/src/client/refresh/gl3/gl3_surf.c +++ b/src/client/refresh/gl3/gl3_surf.c @@ -52,6 +52,13 @@ void GL3_SurfInit(void) glGenBuffers(1, &gl3state.vbo3D); GL3_BindVBO(gl3state.vbo3D); + if(gl3config.useBigVBO) + { + gl3state.vbo3Dsize = 5*1024*1024; // a 5MB buffer seems to work well? + gl3state.vbo3DcurOffset = 0; + glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW); // allocate/reserve that data + } + glEnableVertexAttribArray(GL3_ATTRIB_POSITION); qglVertexAttribPointer(GL3_ATTRIB_POSITION, 3, GL_FLOAT, GL_FALSE, sizeof(gl3_3D_vtx_t), 0); diff --git a/src/client/refresh/gl3/header/local.h b/src/client/refresh/gl3/header/local.h index 3920aaa9..e1be4489 100644 --- a/src/client/refresh/gl3/header/local.h +++ b/src/client/refresh/gl3/header/local.h @@ -106,6 +106,8 @@ typedef struct qboolean debug_output; // is GL_ARB_debug_output supported? qboolean stencil; // Do we have a stencil buffer? + qboolean useBigVBO; // workaround for AMDs windows driver for fewer calls to glBufferData() + // ---- float max_anisotropy; @@ -226,6 +228,11 @@ typedef struct gl3ShaderInfo_t siParticle; // for particles. surprising, right? GLuint vao3D, vbo3D; // for brushes etc, using 10 floats and one uint as vertex input (x,y,z, s,t, lms,lmt, normX,normY,normZ ; lightFlags) + + // the next two are for gl3config.useBigVBO == true + int vbo3Dsize; + int vbo3DcurOffset; + GLuint vaoAlias, vboAlias, eboAlias; // for models, using 9 floats as (x,y,z, s,t, r,g,b,a) GLuint vaoParticle, vboParticle; // for particles, using 9 floats (x,y,z, size,distance, r,g,b,a) From cdf533f995f2888368b240d843840af128ec4765 Mon Sep 17 00:00:00 2001 From: Daniel Gibson Date: Sat, 4 May 2019 17:12:29 +0200 Subject: [PATCH 6/7] Fix overbright models in GL3, refs #393 also some dumb bug with using i in two nested loops --- src/client/refresh/gl3/gl3_light.c | 6 +++--- src/client/refresh/gl3/gl3_shaders.c | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/client/refresh/gl3/gl3_light.c b/src/client/refresh/gl3/gl3_light.c index 629cd907..4ec6343c 100644 --- a/src/client/refresh/gl3/gl3_light.c +++ b/src/client/refresh/gl3/gl3_light.c @@ -233,10 +233,10 @@ RecursiveLightPoint(mnode_t *node, vec3_t start, vec3_t end) for (maps = 0; maps < MAX_LIGHTMAPS_PER_SURFACE && surf->styles[maps] != 255; maps++) { - for (i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) { - scale[i] = r_modulate->value * - gl3_newrefdef.lightstyles[surf->styles[maps]].rgb[i]; + scale[j] = r_modulate->value * + gl3_newrefdef.lightstyles[surf->styles[maps]].rgb[j]; } pointcolor[0] += lightmap[0] * scale[0] * (1.0 / 255); diff --git a/src/client/refresh/gl3/gl3_shaders.c b/src/client/refresh/gl3/gl3_shaders.c index 38e4162d..19925e72 100644 --- a/src/client/refresh/gl3/gl3_shaders.c +++ b/src/client/refresh/gl3/gl3_shaders.c @@ -626,7 +626,7 @@ static const char* fragmentSrcAlias = MULTILINE_STRING( // apply gamma correction and intensity texel.rgb *= intensity; texel.a *= alpha; // is alpha even used here? - texel *= min(vec4(3.0), passColor); + texel *= min(vec4(1.5), passColor); outColor.rgb = pow(texel.rgb, vec3(gamma)); outColor.a = texel.a; // I think alpha shouldn't be modified by gamma and intensity From 107d044da27eee1ad19d9e316cf5bd9faba58935 Mon Sep 17 00:00:00 2001 From: Daniel Gibson Date: Sat, 4 May 2019 17:19:42 +0200 Subject: [PATCH 7/7] Make AMD performance workaround conditional per driver version --- src/client/refresh/gl3/gl3_main.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/client/refresh/gl3/gl3_main.c b/src/client/refresh/gl3/gl3_main.c index 182410d7..83c0eb14 100644 --- a/src/client/refresh/gl3/gl3_main.c +++ b/src/client/refresh/gl3/gl3_main.c @@ -545,20 +545,33 @@ GL3_Init(void) else if(gl3_usebigvbo->value == -1.0f) { // enable for AMDs proprietary Windows and Linux drivers - // TODO: should we match a version number? does the workaround - // slow down legacy drivers that work fine without it? - // This workaround is is tested with the following configuration: - // RX580, Win10, driver version "Adrenalin 2019 Edition 19.4.x" #ifdef _WIN32 - if(gl3config.vendor_string != NULL && strstr(gl3config.vendor_string, "ATI") != NULL) + if(gl3config.version_string != NULL && gl3config.vendor_string != NULL + && strstr(gl3config.vendor_string, "ATI Technologies Inc") != NULL) { - R_Printf(PRINT_ALL, "Detected AMD Windows GPU driver, enabling useBigVBO workaround\n"); - gl3config.useBigVBO = true; + int a, b, ver; + if(sscanf(gl3config.version_string, " %d.%d.%d ", &a, &b, &ver) >= 3 && ver >= 13431) + { + // turns out the legacy driver is a lot faster *without* the workaround :-/ + // GL_VERSION for legacy 16.2.1 Beta driver: 3.2.13399 Core Profile Forward-Compatible Context 15.200.1062.1004 + // (this is the last version that supports the Radeon HD 6950) + // GL_VERSION for (non-legacy) 16.3.1 driver on Radeon R9 200: 4.5.13431 Compatibility Profile Context 16.150.2111.0 + // GL_VERSION for non-legacy 17.7.2 WHQL driver: 4.5.13491 Compatibility Profile/Debug Context 22.19.662.4 + // GL_VERSION for 18.10.1 driver: 4.6.13541 Compatibility Profile/Debug Context 25.20.14003.1010 + // GL_VERSION for (current) 19.3.2 driver: 4.6.13547 Compatibility Profile/Debug Context 25.20.15027.5007 + // (the 3.2/4.5/4.6 can probably be ignored, might depend on the card and what kind of context was requested + // but AFAIK the number behind that can be used to roughly match the driver version) + // => let's try matching for x.y.z with z >= 13431 + // (no, I don't feel like testing which release since 16.2.1 has introduced the slowdown.) + R_Printf(PRINT_ALL, "Detected AMD Windows GPU driver, enabling useBigVBO workaround\n"); + gl3config.useBigVBO = true; + } } #elif defined(__linux__) if(gl3config.vendor_string != NULL && strstr(gl3config.vendor_string, "Advanced Micro Devices, Inc.") != NULL) { R_Printf(PRINT_ALL, "Detected proprietary AMD GPU driver, enabling useBigVBO workaround\n"); + R_Printf(PRINT_ALL, "(consider using the open source RadeonSI drivers, they tend to work better overall)\n"); gl3config.useBigVBO = true; } #endif