diff --git a/doc/04_cvarlist.md b/doc/04_cvarlist.md index 39189bd5..c2f01221 100644 --- a/doc/04_cvarlist.md +++ b/doc/04_cvarlist.md @@ -177,6 +177,12 @@ it's `+set busywait 0` (setting the `busywait` cvar) and `-portable` and `16`. Anisotropic filtering gives a huge improvement to texture quality by a negligible performance impact. +* **gl_fixsurfsky**: Some maps misuse sky surfaces for interior + lightning. The original renderer had a bug that made such surfaces + mess up the lightning of entities near them. If set to `0` (the + default) the bug is there and maps look like their developers + intended. If set to `1` the bug is fixed and the lightning correct. + * **gl_msaa_samples**: Full scene anti aliasing samples. The number of samples depends on the GPU driver, most drivers support at least `2`, `4` and `8` samples. If an invalid value is set, the value is reverted diff --git a/src/client/refresh/gl1/gl1_model.c b/src/client/refresh/gl1/gl1_model.c index 64fe649d..c92031f9 100644 --- a/src/client/refresh/gl1/gl1_model.c +++ b/src/client/refresh/gl1/gl1_model.c @@ -577,6 +577,8 @@ Mod_LoadFaces(lump_t *l) int planenum, side; int ti; + cvar_t* gl_fixsurfsky = ri.Cvar_Get("gl_fixsurfsky", "0", CVAR_ARCHIVE); + in = (void *)(mod_base + l->fileofs); if (l->filelen % sizeof(*in)) @@ -655,9 +657,16 @@ Mod_LoadFaces(lump_t *l) R_SubdivideSurface(out); /* cut up polygon for warps */ } + if (gl_fixsurfsky->value) + { + if (out->texinfo->flags & SURF_SKY) + { + out->flags |= SURF_DRAWSKY; + } + } + /* create lightmaps and polygons */ - if (!(out->texinfo->flags & - (SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP))) + if (!(out->texinfo->flags & (SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP))) { LM_CreateSurfaceLightmap(out); } diff --git a/src/client/refresh/gl3/gl3_light.c b/src/client/refresh/gl3/gl3_light.c index 629cd907..4ec6343c 100644 --- a/src/client/refresh/gl3/gl3_light.c +++ b/src/client/refresh/gl3/gl3_light.c @@ -233,10 +233,10 @@ RecursiveLightPoint(mnode_t *node, vec3_t start, vec3_t end) for (maps = 0; maps < MAX_LIGHTMAPS_PER_SURFACE && surf->styles[maps] != 255; maps++) { - for (i = 0; i < 3; i++) + for (int j = 0; j < 3; j++) { - scale[i] = r_modulate->value * - gl3_newrefdef.lightstyles[surf->styles[maps]].rgb[i]; + scale[j] = r_modulate->value * + gl3_newrefdef.lightstyles[surf->styles[maps]].rgb[j]; } pointcolor[0] += lightmap[0] * scale[0] * (1.0 / 255); diff --git a/src/client/refresh/gl3/gl3_main.c b/src/client/refresh/gl3/gl3_main.c index d65c481c..83c0eb14 100644 --- a/src/client/refresh/gl3/gl3_main.c +++ b/src/client/refresh/gl3/gl3_main.c @@ -121,6 +121,7 @@ cvar_t *r_modulate; cvar_t *gl_lightmap; cvar_t *gl_shadows; cvar_t *gl3_debugcontext; +cvar_t *gl3_usebigvbo; // Yaw-Pitch-Roll // equivalent to R_z * R_y * R_x where R_x is the trans matrix for rotating around X axis for aroundXdeg @@ -206,6 +207,11 @@ GL3_Register(void) gl3_particle_fade_factor = ri.Cvar_Get("gl3_particle_fade_factor", "1.2", CVAR_ARCHIVE); gl3_particle_square = ri.Cvar_Get("gl3_particle_square", "0", CVAR_ARCHIVE); + // 0: use lots of calls to glBufferData() + // 1: reduce calls to glBufferData() with one big VBO (see GL3_BufferAndDraw3D()) + // -1: auto (let yq2 choose to enable/disable this based on detected driver) + gl3_usebigvbo = ri.Cvar_Get("gl3_usebigvbo", "-1", CVAR_ARCHIVE); + r_norefresh = ri.Cvar_Get("r_norefresh", "0", 0); r_drawentities = ri.Cvar_Get("r_drawentities", "1", 0); r_drawworld = ri.Cvar_Get("r_drawworld", "1", 0); @@ -530,6 +536,47 @@ GL3_Init(void) R_Printf(PRINT_ALL, " - OpenGL Debug Output: Not Supported\n"); } + gl3config.useBigVBO = false; + if(gl3_usebigvbo->value == 1.0f) + { + R_Printf(PRINT_ALL, "Enabling useBigVBO workaround because gl3_usebigvbo = 1\n"); + gl3config.useBigVBO = true; + } + else if(gl3_usebigvbo->value == -1.0f) + { + // enable for AMDs proprietary Windows and Linux drivers +#ifdef _WIN32 + if(gl3config.version_string != NULL && gl3config.vendor_string != NULL + && strstr(gl3config.vendor_string, "ATI Technologies Inc") != NULL) + { + int a, b, ver; + if(sscanf(gl3config.version_string, " %d.%d.%d ", &a, &b, &ver) >= 3 && ver >= 13431) + { + // turns out the legacy driver is a lot faster *without* the workaround :-/ + // GL_VERSION for legacy 16.2.1 Beta driver: 3.2.13399 Core Profile Forward-Compatible Context 15.200.1062.1004 + // (this is the last version that supports the Radeon HD 6950) + // GL_VERSION for (non-legacy) 16.3.1 driver on Radeon R9 200: 4.5.13431 Compatibility Profile Context 16.150.2111.0 + // GL_VERSION for non-legacy 17.7.2 WHQL driver: 4.5.13491 Compatibility Profile/Debug Context 22.19.662.4 + // GL_VERSION for 18.10.1 driver: 4.6.13541 Compatibility Profile/Debug Context 25.20.14003.1010 + // GL_VERSION for (current) 19.3.2 driver: 4.6.13547 Compatibility Profile/Debug Context 25.20.15027.5007 + // (the 3.2/4.5/4.6 can probably be ignored, might depend on the card and what kind of context was requested + // but AFAIK the number behind that can be used to roughly match the driver version) + // => let's try matching for x.y.z with z >= 13431 + // (no, I don't feel like testing which release since 16.2.1 has introduced the slowdown.) + R_Printf(PRINT_ALL, "Detected AMD Windows GPU driver, enabling useBigVBO workaround\n"); + gl3config.useBigVBO = true; + } + } +#elif defined(__linux__) + if(gl3config.vendor_string != NULL && strstr(gl3config.vendor_string, "Advanced Micro Devices, Inc.") != NULL) + { + R_Printf(PRINT_ALL, "Detected proprietary AMD GPU driver, enabling useBigVBO workaround\n"); + R_Printf(PRINT_ALL, "(consider using the open source RadeonSI drivers, they tend to work better overall)\n"); + gl3config.useBigVBO = true; + } +#endif + } + // generate texture handles for all possible lightmaps glGenTextures(MAX_LIGHTMAPS*MAX_LIGHTMAPS_PER_SURFACE, gl3state.lightmap_textureIDs[0]); @@ -583,6 +630,80 @@ GL3_Shutdown(void) GL3_ShutdownContext(); } +// assumes gl3state.v[ab]o3D are bound +// buffers and draws gl3_3D_vtx_t vertices +// drawMode is something like GL_TRIANGLE_STRIP or GL_TRIANGLE_FAN or whatever +void +GL3_BufferAndDraw3D(const gl3_3D_vtx_t* verts, int numVerts, GLenum drawMode) +{ + if(!gl3config.useBigVBO) + { + glBufferData( GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*numVerts, verts, GL_STREAM_DRAW ); + glDrawArrays( drawMode, 0, numVerts ); + } + else // gl3config.useBigVBO == true + { + /* + * For some reason, AMD's Windows driver doesn't seem to like lots of + * calls to glBufferData() (some of them seem to take very long then). + * GL3_BufferAndDraw3D() is called a lot when drawing world geometry + * (once for each visible face I think?). + * The simple code above caused noticeable slowdowns - even a fast + * quadcore CPU and a Radeon RX580 weren't able to maintain 60fps.. + * The workaround is to not call glBufferData() with small data all the time, + * but to allocate a big buffer and on each call to GL3_BufferAndDraw3D() + * to use a different region of that buffer, resulting in a lot less calls + * to glBufferData() (=> a lot less buffer allocations in the driver). + * Only when the buffer is full and at the end of a frame (=> GL3_EndFrame()) + * we get a fresh buffer. + * + * BTW, we couldn't observe this kind of problem with any other driver: + * Neither nvidias driver, nor AMDs or Intels Open Source Linux drivers, + * not even Intels Windows driver seem to care that much about the + * glBufferData() calls.. However, at least nvidias driver doesn't like + * this workaround (with glMapBufferRange()), the framerate dropped + * significantly - that's why both methods are available and + * selectable at runtime. + */ +#if 0 + // I /think/ doing it with glBufferSubData() didn't really help + const int bufSize = gl3state.vbo3Dsize; + int neededSize = numVerts*sizeof(gl3_3D_vtx_t); + int curOffset = gl3state.vbo3DcurOffset; + if(curOffset + neededSize > gl3state.vbo3Dsize) + curOffset = 0; + int curIdx = curOffset / sizeof(gl3_3D_vtx_t); + + gl3state.vbo3DcurOffset = curOffset + neededSize; + + glBufferSubData( GL_ARRAY_BUFFER, curOffset, neededSize, verts ); + glDrawArrays( drawMode, curIdx, numVerts ); +#else + int curOffset = gl3state.vbo3DcurOffset; + int neededSize = numVerts*sizeof(gl3_3D_vtx_t); + if(curOffset+neededSize > gl3state.vbo3Dsize) + { + // buffer is full, need to start again from the beginning + // => need to sync or get fresh buffer + // (getting fresh buffer seems easier) + glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW); + curOffset = 0; + } + + // as we make sure to use a previously unused part of the buffer, + // doing it unsynchronized should be safe.. + GLbitfield accessBits = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT; + void* data = glMapBufferRange(GL_ARRAY_BUFFER, curOffset, neededSize, accessBits); + memcpy(data, verts, neededSize); + glUnmapBuffer(GL_ARRAY_BUFFER); + + glDrawArrays(drawMode, curOffset/sizeof(gl3_3D_vtx_t), numVerts); + + gl3state.vbo3DcurOffset = curOffset + neededSize; // TODO: padding or sth needed? +#endif + } +} + static void GL3_DrawBeam(entity_t *e) { @@ -658,8 +779,7 @@ GL3_DrawBeam(entity_t *e) GL3_BindVAO(gl3state.vao3D); GL3_BindVBO(gl3state.vbo3D); - glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STREAM_DRAW); - glDrawArrays( GL_TRIANGLE_STRIP, 0, NUM_BEAM_SEGS*4 ); + GL3_BufferAndDraw3D(verts, NUM_BEAM_SEGS*4, GL_TRIANGLE_STRIP); glDisable(GL_BLEND); glDepthMask(GL_TRUE); @@ -734,8 +854,7 @@ GL3_DrawSpriteModel(entity_t *e) GL3_BindVAO(gl3state.vao3D); GL3_BindVBO(gl3state.vbo3D); - glBufferData(GL_ARRAY_BUFFER, 4*sizeof(gl3_3D_vtx_t), verts, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + GL3_BufferAndDraw3D(verts, 4, GL_TRIANGLE_FAN); if (alpha != 1.0F) { @@ -779,16 +898,14 @@ GL3_DrawNullModel(void) {{16 * cos( 4 * M_PI / 2 ), 16 * sin( 4 * M_PI / 2 ), 0}, {0,0}, {0,0}} }; - glBufferData(GL_ARRAY_BUFFER, sizeof(vtxA), vtxA, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, 6); + GL3_BufferAndDraw3D(vtxA, 6, GL_TRIANGLE_FAN); gl3_3D_vtx_t vtxB[6] = { {{0, 0, 16}, {0,0}, {0,0}}, vtxA[5], vtxA[4], vtxA[3], vtxA[2], vtxA[1] }; - glBufferData(GL_ARRAY_BUFFER, sizeof(vtxB), vtxB, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, 6); + GL3_BufferAndDraw3D(vtxB, 6, GL_TRIANGLE_FAN); gl3state.uni3DData.transModelMat4 = origModelMat; GL3_UpdateUBO3D(); diff --git a/src/client/refresh/gl3/gl3_model.c b/src/client/refresh/gl3/gl3_model.c index 27ddb630..6964e50c 100644 --- a/src/client/refresh/gl3/gl3_model.c +++ b/src/client/refresh/gl3/gl3_model.c @@ -463,6 +463,8 @@ Mod_LoadFaces(lump_t *l) int planenum, side; int ti; + cvar_t* gl_fixsurfsky = ri.Cvar_Get("gl_fixsurfsky", "0", CVAR_ARCHIVE); + in = (void *)(mod_base + l->fileofs); if (l->filelen % sizeof(*in)) @@ -541,6 +543,14 @@ Mod_LoadFaces(lump_t *l) GL3_SubdivideSurface(out, loadmodel); /* cut up polygon for warps */ } + if (gl_fixsurfsky->value) + { + if (out->texinfo->flags & SURF_SKY) + { + out->flags |= SURF_DRAWSKY; + } + } + /* create lightmaps and polygons */ if (!(out->texinfo->flags & (SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP))) { diff --git a/src/client/refresh/gl3/gl3_sdl.c b/src/client/refresh/gl3/gl3_sdl.c index cc89c71b..75296c6f 100644 --- a/src/client/refresh/gl3/gl3_sdl.c +++ b/src/client/refresh/gl3/gl3_sdl.c @@ -98,6 +98,15 @@ DebugCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei le */ void GL3_EndFrame(void) { + if(gl3config.useBigVBO) + { + // I think this is a good point to orphan the VBO and get a fresh one + GL3_BindVAO(gl3state.vao3D); + GL3_BindVBO(gl3state.vbo3D); + glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW); + gl3state.vbo3DcurOffset = 0; + } + SDL_GL_SwapWindow(window); } diff --git a/src/client/refresh/gl3/gl3_shaders.c b/src/client/refresh/gl3/gl3_shaders.c index 38e4162d..19925e72 100644 --- a/src/client/refresh/gl3/gl3_shaders.c +++ b/src/client/refresh/gl3/gl3_shaders.c @@ -626,7 +626,7 @@ static const char* fragmentSrcAlias = MULTILINE_STRING( // apply gamma correction and intensity texel.rgb *= intensity; texel.a *= alpha; // is alpha even used here? - texel *= min(vec4(3.0), passColor); + texel *= min(vec4(1.5), passColor); outColor.rgb = pow(texel.rgb, vec3(gamma)); outColor.a = texel.a; // I think alpha shouldn't be modified by gamma and intensity diff --git a/src/client/refresh/gl3/gl3_surf.c b/src/client/refresh/gl3/gl3_surf.c index 95835189..a256cb67 100644 --- a/src/client/refresh/gl3/gl3_surf.c +++ b/src/client/refresh/gl3/gl3_surf.c @@ -44,7 +44,7 @@ extern int numgl3textures; void GL3_SurfInit(void) { // init the VAO and VBO for the standard vertexdata: 10 floats and 1 uint - // (X, Y, Z), (S, T), (LMS, LMT), (normX, normY, normZ) - last two groups for lightmap/dynlights + // (X, Y, Z), (S, T), (LMS, LMT), (normX, normY, normZ) ; lightFlags - last two groups for lightmap/dynlights glGenVertexArrays(1, &gl3state.vao3D); GL3_BindVAO(gl3state.vao3D); @@ -52,6 +52,13 @@ void GL3_SurfInit(void) glGenBuffers(1, &gl3state.vbo3D); GL3_BindVBO(gl3state.vbo3D); + if(gl3config.useBigVBO) + { + gl3state.vbo3Dsize = 5*1024*1024; // a 5MB buffer seems to work well? + gl3state.vbo3DcurOffset = 0; + glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW); // allocate/reserve that data + } + glEnableVertexAttribArray(GL3_ATTRIB_POSITION); qglVertexAttribPointer(GL3_ATTRIB_POSITION, 3, GL_FLOAT, GL_FALSE, sizeof(gl3_3D_vtx_t), 0); @@ -212,9 +219,8 @@ GL3_DrawGLPoly(msurface_t *fa) GL3_BindVAO(gl3state.vao3D); GL3_BindVBO(gl3state.vbo3D); - glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*p->numverts, p->vertices, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, p->numverts); + GL3_BufferAndDraw3D(p->vertices, p->numverts, GL_TRIANGLE_FAN); } void @@ -241,8 +247,7 @@ GL3_DrawGLFlowingPoly(msurface_t *fa) GL3_BindVAO(gl3state.vao3D); GL3_BindVBO(gl3state.vbo3D); - glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*p->numverts, p->vertices, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, p->numverts); + GL3_BufferAndDraw3D(p->vertices, p->numverts, GL_TRIANGLE_FAN); } static void diff --git a/src/client/refresh/gl3/gl3_warp.c b/src/client/refresh/gl3/gl3_warp.c index aa721d8c..5062231b 100644 --- a/src/client/refresh/gl3/gl3_warp.c +++ b/src/client/refresh/gl3/gl3_warp.c @@ -255,9 +255,7 @@ GL3_EmitWaterPolys(msurface_t *fa) for (bp = fa->polys; bp != NULL; bp = bp->next) { - int numverts = bp->numverts; - glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*numverts, bp->vertices, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, numverts); + GL3_BufferAndDraw3D(bp->vertices, bp->numverts, GL_TRIANGLE_FAN); } } @@ -726,8 +724,7 @@ GL3_DrawSkyBox(void) MakeSkyVec( skymaxs [ 0 ] [ i ], skymaxs [ 1 ] [ i ], i, &skyVertices[2] ); MakeSkyVec( skymaxs [ 0 ] [ i ], skymins [ 1 ] [ i ], i, &skyVertices[3] ); - glBufferData(GL_ARRAY_BUFFER, sizeof(skyVertices), skyVertices, GL_STREAM_DRAW); - glDrawArrays(GL_TRIANGLE_FAN, 0, 4); + GL3_BufferAndDraw3D(skyVertices, 4, GL_TRIANGLE_FAN); } // glPopMatrix(); diff --git a/src/client/refresh/gl3/header/local.h b/src/client/refresh/gl3/header/local.h index a94e8a3d..e1be4489 100644 --- a/src/client/refresh/gl3/header/local.h +++ b/src/client/refresh/gl3/header/local.h @@ -106,6 +106,8 @@ typedef struct qboolean debug_output; // is GL_ARB_debug_output supported? qboolean stencil; // Do we have a stencil buffer? + qboolean useBigVBO; // workaround for AMDs windows driver for fewer calls to glBufferData() + // ---- float max_anisotropy; @@ -225,7 +227,12 @@ typedef struct // NOTE: make sure siParticle is always the last shaderInfo (or adapt GL3_ShutdownShaders()) gl3ShaderInfo_t siParticle; // for particles. surprising, right? - GLuint vao3D, vbo3D; // for brushes etc, using 1 floats as vertex input (x,y,z, s,t, lms,lmt, normX,normY,normZ) + GLuint vao3D, vbo3D; // for brushes etc, using 10 floats and one uint as vertex input (x,y,z, s,t, lms,lmt, normX,normY,normZ ; lightFlags) + + // the next two are for gl3config.useBigVBO == true + int vbo3Dsize; + int vbo3DcurOffset; + GLuint vaoAlias, vboAlias, eboAlias; // for models, using 9 floats as (x,y,z, s,t, r,g,b,a) GLuint vaoParticle, vboParticle; // for particles, using 9 floats (x,y,z, size,distance, r,g,b,a) @@ -350,6 +357,8 @@ GL3_BindEBO(GLuint ebo) } } +extern void GL3_BufferAndDraw3D(const gl3_3D_vtx_t* verts, int numVerts, GLenum drawMode); + extern qboolean GL3_CullBox(vec3_t mins, vec3_t maxs); extern void GL3_RotateForEntity(entity_t *e);