mirror of
https://github.com/yquake2/yquake2remaster.git
synced 2025-01-19 16:00:56 +00:00
Merge pull request #395 from DanielGibson/faster-gl3
Make gl3 fast with AMDs Windows drivers, fix overbright models with SURF_DRAWSKY
This commit is contained in:
commit
d84bc47766
10 changed files with 187 additions and 25 deletions
|
@ -177,6 +177,12 @@ it's `+set busywait 0` (setting the `busywait` cvar) and `-portable`
|
|||
and `16`. Anisotropic filtering gives a huge improvement to texture
|
||||
quality by a negligible performance impact.
|
||||
|
||||
* **gl_fixsurfsky**: Some maps misuse sky surfaces for interior
|
||||
lightning. The original renderer had a bug that made such surfaces
|
||||
mess up the lightning of entities near them. If set to `0` (the
|
||||
default) the bug is there and maps look like their developers
|
||||
intended. If set to `1` the bug is fixed and the lightning correct.
|
||||
|
||||
* **gl_msaa_samples**: Full scene anti aliasing samples. The number of
|
||||
samples depends on the GPU driver, most drivers support at least `2`,
|
||||
`4` and `8` samples. If an invalid value is set, the value is reverted
|
||||
|
|
|
@ -577,6 +577,8 @@ Mod_LoadFaces(lump_t *l)
|
|||
int planenum, side;
|
||||
int ti;
|
||||
|
||||
cvar_t* gl_fixsurfsky = ri.Cvar_Get("gl_fixsurfsky", "0", CVAR_ARCHIVE);
|
||||
|
||||
in = (void *)(mod_base + l->fileofs);
|
||||
|
||||
if (l->filelen % sizeof(*in))
|
||||
|
@ -655,9 +657,16 @@ Mod_LoadFaces(lump_t *l)
|
|||
R_SubdivideSurface(out); /* cut up polygon for warps */
|
||||
}
|
||||
|
||||
if (gl_fixsurfsky->value)
|
||||
{
|
||||
if (out->texinfo->flags & SURF_SKY)
|
||||
{
|
||||
out->flags |= SURF_DRAWSKY;
|
||||
}
|
||||
}
|
||||
|
||||
/* create lightmaps and polygons */
|
||||
if (!(out->texinfo->flags &
|
||||
(SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP)))
|
||||
if (!(out->texinfo->flags & (SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP)))
|
||||
{
|
||||
LM_CreateSurfaceLightmap(out);
|
||||
}
|
||||
|
|
|
@ -233,10 +233,10 @@ RecursiveLightPoint(mnode_t *node, vec3_t start, vec3_t end)
|
|||
for (maps = 0; maps < MAX_LIGHTMAPS_PER_SURFACE && surf->styles[maps] != 255;
|
||||
maps++)
|
||||
{
|
||||
for (i = 0; i < 3; i++)
|
||||
for (int j = 0; j < 3; j++)
|
||||
{
|
||||
scale[i] = r_modulate->value *
|
||||
gl3_newrefdef.lightstyles[surf->styles[maps]].rgb[i];
|
||||
scale[j] = r_modulate->value *
|
||||
gl3_newrefdef.lightstyles[surf->styles[maps]].rgb[j];
|
||||
}
|
||||
|
||||
pointcolor[0] += lightmap[0] * scale[0] * (1.0 / 255);
|
||||
|
|
|
@ -121,6 +121,7 @@ cvar_t *r_modulate;
|
|||
cvar_t *gl_lightmap;
|
||||
cvar_t *gl_shadows;
|
||||
cvar_t *gl3_debugcontext;
|
||||
cvar_t *gl3_usebigvbo;
|
||||
|
||||
// Yaw-Pitch-Roll
|
||||
// equivalent to R_z * R_y * R_x where R_x is the trans matrix for rotating around X axis for aroundXdeg
|
||||
|
@ -206,6 +207,11 @@ GL3_Register(void)
|
|||
gl3_particle_fade_factor = ri.Cvar_Get("gl3_particle_fade_factor", "1.2", CVAR_ARCHIVE);
|
||||
gl3_particle_square = ri.Cvar_Get("gl3_particle_square", "0", CVAR_ARCHIVE);
|
||||
|
||||
// 0: use lots of calls to glBufferData()
|
||||
// 1: reduce calls to glBufferData() with one big VBO (see GL3_BufferAndDraw3D())
|
||||
// -1: auto (let yq2 choose to enable/disable this based on detected driver)
|
||||
gl3_usebigvbo = ri.Cvar_Get("gl3_usebigvbo", "-1", CVAR_ARCHIVE);
|
||||
|
||||
r_norefresh = ri.Cvar_Get("r_norefresh", "0", 0);
|
||||
r_drawentities = ri.Cvar_Get("r_drawentities", "1", 0);
|
||||
r_drawworld = ri.Cvar_Get("r_drawworld", "1", 0);
|
||||
|
@ -530,6 +536,47 @@ GL3_Init(void)
|
|||
R_Printf(PRINT_ALL, " - OpenGL Debug Output: Not Supported\n");
|
||||
}
|
||||
|
||||
gl3config.useBigVBO = false;
|
||||
if(gl3_usebigvbo->value == 1.0f)
|
||||
{
|
||||
R_Printf(PRINT_ALL, "Enabling useBigVBO workaround because gl3_usebigvbo = 1\n");
|
||||
gl3config.useBigVBO = true;
|
||||
}
|
||||
else if(gl3_usebigvbo->value == -1.0f)
|
||||
{
|
||||
// enable for AMDs proprietary Windows and Linux drivers
|
||||
#ifdef _WIN32
|
||||
if(gl3config.version_string != NULL && gl3config.vendor_string != NULL
|
||||
&& strstr(gl3config.vendor_string, "ATI Technologies Inc") != NULL)
|
||||
{
|
||||
int a, b, ver;
|
||||
if(sscanf(gl3config.version_string, " %d.%d.%d ", &a, &b, &ver) >= 3 && ver >= 13431)
|
||||
{
|
||||
// turns out the legacy driver is a lot faster *without* the workaround :-/
|
||||
// GL_VERSION for legacy 16.2.1 Beta driver: 3.2.13399 Core Profile Forward-Compatible Context 15.200.1062.1004
|
||||
// (this is the last version that supports the Radeon HD 6950)
|
||||
// GL_VERSION for (non-legacy) 16.3.1 driver on Radeon R9 200: 4.5.13431 Compatibility Profile Context 16.150.2111.0
|
||||
// GL_VERSION for non-legacy 17.7.2 WHQL driver: 4.5.13491 Compatibility Profile/Debug Context 22.19.662.4
|
||||
// GL_VERSION for 18.10.1 driver: 4.6.13541 Compatibility Profile/Debug Context 25.20.14003.1010
|
||||
// GL_VERSION for (current) 19.3.2 driver: 4.6.13547 Compatibility Profile/Debug Context 25.20.15027.5007
|
||||
// (the 3.2/4.5/4.6 can probably be ignored, might depend on the card and what kind of context was requested
|
||||
// but AFAIK the number behind that can be used to roughly match the driver version)
|
||||
// => let's try matching for x.y.z with z >= 13431
|
||||
// (no, I don't feel like testing which release since 16.2.1 has introduced the slowdown.)
|
||||
R_Printf(PRINT_ALL, "Detected AMD Windows GPU driver, enabling useBigVBO workaround\n");
|
||||
gl3config.useBigVBO = true;
|
||||
}
|
||||
}
|
||||
#elif defined(__linux__)
|
||||
if(gl3config.vendor_string != NULL && strstr(gl3config.vendor_string, "Advanced Micro Devices, Inc.") != NULL)
|
||||
{
|
||||
R_Printf(PRINT_ALL, "Detected proprietary AMD GPU driver, enabling useBigVBO workaround\n");
|
||||
R_Printf(PRINT_ALL, "(consider using the open source RadeonSI drivers, they tend to work better overall)\n");
|
||||
gl3config.useBigVBO = true;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// generate texture handles for all possible lightmaps
|
||||
glGenTextures(MAX_LIGHTMAPS*MAX_LIGHTMAPS_PER_SURFACE, gl3state.lightmap_textureIDs[0]);
|
||||
|
||||
|
@ -583,6 +630,80 @@ GL3_Shutdown(void)
|
|||
GL3_ShutdownContext();
|
||||
}
|
||||
|
||||
// assumes gl3state.v[ab]o3D are bound
|
||||
// buffers and draws gl3_3D_vtx_t vertices
|
||||
// drawMode is something like GL_TRIANGLE_STRIP or GL_TRIANGLE_FAN or whatever
|
||||
void
|
||||
GL3_BufferAndDraw3D(const gl3_3D_vtx_t* verts, int numVerts, GLenum drawMode)
|
||||
{
|
||||
if(!gl3config.useBigVBO)
|
||||
{
|
||||
glBufferData( GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*numVerts, verts, GL_STREAM_DRAW );
|
||||
glDrawArrays( drawMode, 0, numVerts );
|
||||
}
|
||||
else // gl3config.useBigVBO == true
|
||||
{
|
||||
/*
|
||||
* For some reason, AMD's Windows driver doesn't seem to like lots of
|
||||
* calls to glBufferData() (some of them seem to take very long then).
|
||||
* GL3_BufferAndDraw3D() is called a lot when drawing world geometry
|
||||
* (once for each visible face I think?).
|
||||
* The simple code above caused noticeable slowdowns - even a fast
|
||||
* quadcore CPU and a Radeon RX580 weren't able to maintain 60fps..
|
||||
* The workaround is to not call glBufferData() with small data all the time,
|
||||
* but to allocate a big buffer and on each call to GL3_BufferAndDraw3D()
|
||||
* to use a different region of that buffer, resulting in a lot less calls
|
||||
* to glBufferData() (=> a lot less buffer allocations in the driver).
|
||||
* Only when the buffer is full and at the end of a frame (=> GL3_EndFrame())
|
||||
* we get a fresh buffer.
|
||||
*
|
||||
* BTW, we couldn't observe this kind of problem with any other driver:
|
||||
* Neither nvidias driver, nor AMDs or Intels Open Source Linux drivers,
|
||||
* not even Intels Windows driver seem to care that much about the
|
||||
* glBufferData() calls.. However, at least nvidias driver doesn't like
|
||||
* this workaround (with glMapBufferRange()), the framerate dropped
|
||||
* significantly - that's why both methods are available and
|
||||
* selectable at runtime.
|
||||
*/
|
||||
#if 0
|
||||
// I /think/ doing it with glBufferSubData() didn't really help
|
||||
const int bufSize = gl3state.vbo3Dsize;
|
||||
int neededSize = numVerts*sizeof(gl3_3D_vtx_t);
|
||||
int curOffset = gl3state.vbo3DcurOffset;
|
||||
if(curOffset + neededSize > gl3state.vbo3Dsize)
|
||||
curOffset = 0;
|
||||
int curIdx = curOffset / sizeof(gl3_3D_vtx_t);
|
||||
|
||||
gl3state.vbo3DcurOffset = curOffset + neededSize;
|
||||
|
||||
glBufferSubData( GL_ARRAY_BUFFER, curOffset, neededSize, verts );
|
||||
glDrawArrays( drawMode, curIdx, numVerts );
|
||||
#else
|
||||
int curOffset = gl3state.vbo3DcurOffset;
|
||||
int neededSize = numVerts*sizeof(gl3_3D_vtx_t);
|
||||
if(curOffset+neededSize > gl3state.vbo3Dsize)
|
||||
{
|
||||
// buffer is full, need to start again from the beginning
|
||||
// => need to sync or get fresh buffer
|
||||
// (getting fresh buffer seems easier)
|
||||
glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW);
|
||||
curOffset = 0;
|
||||
}
|
||||
|
||||
// as we make sure to use a previously unused part of the buffer,
|
||||
// doing it unsynchronized should be safe..
|
||||
GLbitfield accessBits = GL_MAP_WRITE_BIT | GL_MAP_INVALIDATE_RANGE_BIT | GL_MAP_UNSYNCHRONIZED_BIT;
|
||||
void* data = glMapBufferRange(GL_ARRAY_BUFFER, curOffset, neededSize, accessBits);
|
||||
memcpy(data, verts, neededSize);
|
||||
glUnmapBuffer(GL_ARRAY_BUFFER);
|
||||
|
||||
glDrawArrays(drawMode, curOffset/sizeof(gl3_3D_vtx_t), numVerts);
|
||||
|
||||
gl3state.vbo3DcurOffset = curOffset + neededSize; // TODO: padding or sth needed?
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
GL3_DrawBeam(entity_t *e)
|
||||
{
|
||||
|
@ -658,8 +779,7 @@ GL3_DrawBeam(entity_t *e)
|
|||
GL3_BindVAO(gl3state.vao3D);
|
||||
GL3_BindVBO(gl3state.vbo3D);
|
||||
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(verts), verts, GL_STREAM_DRAW);
|
||||
glDrawArrays( GL_TRIANGLE_STRIP, 0, NUM_BEAM_SEGS*4 );
|
||||
GL3_BufferAndDraw3D(verts, NUM_BEAM_SEGS*4, GL_TRIANGLE_STRIP);
|
||||
|
||||
glDisable(GL_BLEND);
|
||||
glDepthMask(GL_TRUE);
|
||||
|
@ -734,8 +854,7 @@ GL3_DrawSpriteModel(entity_t *e)
|
|||
GL3_BindVAO(gl3state.vao3D);
|
||||
GL3_BindVBO(gl3state.vbo3D);
|
||||
|
||||
glBufferData(GL_ARRAY_BUFFER, 4*sizeof(gl3_3D_vtx_t), verts, GL_STREAM_DRAW);
|
||||
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
|
||||
GL3_BufferAndDraw3D(verts, 4, GL_TRIANGLE_FAN);
|
||||
|
||||
if (alpha != 1.0F)
|
||||
{
|
||||
|
@ -779,16 +898,14 @@ GL3_DrawNullModel(void)
|
|||
{{16 * cos( 4 * M_PI / 2 ), 16 * sin( 4 * M_PI / 2 ), 0}, {0,0}, {0,0}}
|
||||
};
|
||||
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(vtxA), vtxA, GL_STREAM_DRAW);
|
||||
glDrawArrays(GL_TRIANGLE_FAN, 0, 6);
|
||||
GL3_BufferAndDraw3D(vtxA, 6, GL_TRIANGLE_FAN);
|
||||
|
||||
gl3_3D_vtx_t vtxB[6] = {
|
||||
{{0, 0, 16}, {0,0}, {0,0}},
|
||||
vtxA[5], vtxA[4], vtxA[3], vtxA[2], vtxA[1]
|
||||
};
|
||||
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(vtxB), vtxB, GL_STREAM_DRAW);
|
||||
glDrawArrays(GL_TRIANGLE_FAN, 0, 6);
|
||||
GL3_BufferAndDraw3D(vtxB, 6, GL_TRIANGLE_FAN);
|
||||
|
||||
gl3state.uni3DData.transModelMat4 = origModelMat;
|
||||
GL3_UpdateUBO3D();
|
||||
|
|
|
@ -463,6 +463,8 @@ Mod_LoadFaces(lump_t *l)
|
|||
int planenum, side;
|
||||
int ti;
|
||||
|
||||
cvar_t* gl_fixsurfsky = ri.Cvar_Get("gl_fixsurfsky", "0", CVAR_ARCHIVE);
|
||||
|
||||
in = (void *)(mod_base + l->fileofs);
|
||||
|
||||
if (l->filelen % sizeof(*in))
|
||||
|
@ -541,6 +543,14 @@ Mod_LoadFaces(lump_t *l)
|
|||
GL3_SubdivideSurface(out, loadmodel); /* cut up polygon for warps */
|
||||
}
|
||||
|
||||
if (gl_fixsurfsky->value)
|
||||
{
|
||||
if (out->texinfo->flags & SURF_SKY)
|
||||
{
|
||||
out->flags |= SURF_DRAWSKY;
|
||||
}
|
||||
}
|
||||
|
||||
/* create lightmaps and polygons */
|
||||
if (!(out->texinfo->flags & (SURF_SKY | SURF_TRANS33 | SURF_TRANS66 | SURF_WARP)))
|
||||
{
|
||||
|
|
|
@ -98,6 +98,15 @@ DebugCallback(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei le
|
|||
*/
|
||||
void GL3_EndFrame(void)
|
||||
{
|
||||
if(gl3config.useBigVBO)
|
||||
{
|
||||
// I think this is a good point to orphan the VBO and get a fresh one
|
||||
GL3_BindVAO(gl3state.vao3D);
|
||||
GL3_BindVBO(gl3state.vbo3D);
|
||||
glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW);
|
||||
gl3state.vbo3DcurOffset = 0;
|
||||
}
|
||||
|
||||
SDL_GL_SwapWindow(window);
|
||||
}
|
||||
|
||||
|
|
|
@ -626,7 +626,7 @@ static const char* fragmentSrcAlias = MULTILINE_STRING(
|
|||
// apply gamma correction and intensity
|
||||
texel.rgb *= intensity;
|
||||
texel.a *= alpha; // is alpha even used here?
|
||||
texel *= min(vec4(3.0), passColor);
|
||||
texel *= min(vec4(1.5), passColor);
|
||||
|
||||
outColor.rgb = pow(texel.rgb, vec3(gamma));
|
||||
outColor.a = texel.a; // I think alpha shouldn't be modified by gamma and intensity
|
||||
|
|
|
@ -44,7 +44,7 @@ extern int numgl3textures;
|
|||
void GL3_SurfInit(void)
|
||||
{
|
||||
// init the VAO and VBO for the standard vertexdata: 10 floats and 1 uint
|
||||
// (X, Y, Z), (S, T), (LMS, LMT), (normX, normY, normZ) - last two groups for lightmap/dynlights
|
||||
// (X, Y, Z), (S, T), (LMS, LMT), (normX, normY, normZ) ; lightFlags - last two groups for lightmap/dynlights
|
||||
|
||||
glGenVertexArrays(1, &gl3state.vao3D);
|
||||
GL3_BindVAO(gl3state.vao3D);
|
||||
|
@ -52,6 +52,13 @@ void GL3_SurfInit(void)
|
|||
glGenBuffers(1, &gl3state.vbo3D);
|
||||
GL3_BindVBO(gl3state.vbo3D);
|
||||
|
||||
if(gl3config.useBigVBO)
|
||||
{
|
||||
gl3state.vbo3Dsize = 5*1024*1024; // a 5MB buffer seems to work well?
|
||||
gl3state.vbo3DcurOffset = 0;
|
||||
glBufferData(GL_ARRAY_BUFFER, gl3state.vbo3Dsize, NULL, GL_STREAM_DRAW); // allocate/reserve that data
|
||||
}
|
||||
|
||||
glEnableVertexAttribArray(GL3_ATTRIB_POSITION);
|
||||
qglVertexAttribPointer(GL3_ATTRIB_POSITION, 3, GL_FLOAT, GL_FALSE, sizeof(gl3_3D_vtx_t), 0);
|
||||
|
||||
|
@ -212,9 +219,8 @@ GL3_DrawGLPoly(msurface_t *fa)
|
|||
|
||||
GL3_BindVAO(gl3state.vao3D);
|
||||
GL3_BindVBO(gl3state.vbo3D);
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*p->numverts, p->vertices, GL_STREAM_DRAW);
|
||||
|
||||
glDrawArrays(GL_TRIANGLE_FAN, 0, p->numverts);
|
||||
GL3_BufferAndDraw3D(p->vertices, p->numverts, GL_TRIANGLE_FAN);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -241,8 +247,7 @@ GL3_DrawGLFlowingPoly(msurface_t *fa)
|
|||
GL3_BindVAO(gl3state.vao3D);
|
||||
GL3_BindVBO(gl3state.vbo3D);
|
||||
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*p->numverts, p->vertices, GL_STREAM_DRAW);
|
||||
glDrawArrays(GL_TRIANGLE_FAN, 0, p->numverts);
|
||||
GL3_BufferAndDraw3D(p->vertices, p->numverts, GL_TRIANGLE_FAN);
|
||||
}
|
||||
|
||||
static void
|
||||
|
|
|
@ -255,9 +255,7 @@ GL3_EmitWaterPolys(msurface_t *fa)
|
|||
|
||||
for (bp = fa->polys; bp != NULL; bp = bp->next)
|
||||
{
|
||||
int numverts = bp->numverts;
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(gl3_3D_vtx_t)*numverts, bp->vertices, GL_STREAM_DRAW);
|
||||
glDrawArrays(GL_TRIANGLE_FAN, 0, numverts);
|
||||
GL3_BufferAndDraw3D(bp->vertices, bp->numverts, GL_TRIANGLE_FAN);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -726,8 +724,7 @@ GL3_DrawSkyBox(void)
|
|||
MakeSkyVec( skymaxs [ 0 ] [ i ], skymaxs [ 1 ] [ i ], i, &skyVertices[2] );
|
||||
MakeSkyVec( skymaxs [ 0 ] [ i ], skymins [ 1 ] [ i ], i, &skyVertices[3] );
|
||||
|
||||
glBufferData(GL_ARRAY_BUFFER, sizeof(skyVertices), skyVertices, GL_STREAM_DRAW);
|
||||
glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
|
||||
GL3_BufferAndDraw3D(skyVertices, 4, GL_TRIANGLE_FAN);
|
||||
}
|
||||
|
||||
// glPopMatrix();
|
||||
|
|
|
@ -106,6 +106,8 @@ typedef struct
|
|||
qboolean debug_output; // is GL_ARB_debug_output supported?
|
||||
qboolean stencil; // Do we have a stencil buffer?
|
||||
|
||||
qboolean useBigVBO; // workaround for AMDs windows driver for fewer calls to glBufferData()
|
||||
|
||||
// ----
|
||||
|
||||
float max_anisotropy;
|
||||
|
@ -225,7 +227,12 @@ typedef struct
|
|||
// NOTE: make sure siParticle is always the last shaderInfo (or adapt GL3_ShutdownShaders())
|
||||
gl3ShaderInfo_t siParticle; // for particles. surprising, right?
|
||||
|
||||
GLuint vao3D, vbo3D; // for brushes etc, using 1 floats as vertex input (x,y,z, s,t, lms,lmt, normX,normY,normZ)
|
||||
GLuint vao3D, vbo3D; // for brushes etc, using 10 floats and one uint as vertex input (x,y,z, s,t, lms,lmt, normX,normY,normZ ; lightFlags)
|
||||
|
||||
// the next two are for gl3config.useBigVBO == true
|
||||
int vbo3Dsize;
|
||||
int vbo3DcurOffset;
|
||||
|
||||
GLuint vaoAlias, vboAlias, eboAlias; // for models, using 9 floats as (x,y,z, s,t, r,g,b,a)
|
||||
GLuint vaoParticle, vboParticle; // for particles, using 9 floats (x,y,z, size,distance, r,g,b,a)
|
||||
|
||||
|
@ -350,6 +357,8 @@ GL3_BindEBO(GLuint ebo)
|
|||
}
|
||||
}
|
||||
|
||||
extern void GL3_BufferAndDraw3D(const gl3_3D_vtx_t* verts, int numVerts, GLenum drawMode);
|
||||
|
||||
extern qboolean GL3_CullBox(vec3_t mins, vec3_t maxs);
|
||||
extern void GL3_RotateForEntity(entity_t *e);
|
||||
|
||||
|
|
Loading…
Reference in a new issue