From f7105189036de62db1ddef10be818c21c20d18bb Mon Sep 17 00:00:00 2001 From: Christoph Oelckers Date: Mon, 30 Jun 2014 18:10:55 +0200 Subject: [PATCH] - use a uniform array to store vertex data to render dynamic stuff on GL 3.x hardware without the ARB_buffer_storage extension. Due to the way the engine works it needs to render a lot of small primitives with frequent state changes. But due to the performance of buffer uploads it is impossible to upload each primitive's vertices to a buffer separately because buffer uploads nearly always stall the GPU. On the other hand, in order to reduce the amount of buffer uploads all the necessary state changes would have to be saved in an array until they can finally be used. This method also imposed an unacceptable overhead. Fortunately, uploading uniform arrays is very fast and doesn't cause GPU stalls, so now the engine puts the vertex data per primitive into a uniform array and uses a static vertex buffer to index the array in the vertex shader. This method offers the same performance as immediate mode but only uses core profile features. --- src/gl/data/gl_vertexbuffer.cpp | 60 +++++++++++++++++++++++++++--- src/gl/shaders/gl_shader.cpp | 8 ++++ src/gl/shaders/gl_shader.h | 7 ++++ wadsrc/static/shaders/glsl/main.vp | 28 +++++++++++++- 4 files changed, 95 insertions(+), 8 deletions(-) diff --git a/src/gl/data/gl_vertexbuffer.cpp b/src/gl/data/gl_vertexbuffer.cpp index 3ea401dcd3..b6d33dacdc 100644 --- a/src/gl/data/gl_vertexbuffer.cpp +++ b/src/gl/data/gl_vertexbuffer.cpp @@ -90,6 +90,14 @@ FFlatVertexBuffer::FFlatVertexBuffer() { vbo_shadowdata.Reserve(BUFFER_SIZE); map = &vbo_shadowdata[0]; + + FFlatVertex fill[20]; + for (int i = 0; i < 20; i++) + { + fill[i].Set(0, 0, 0, 100001.f, i); + } + glBindBuffer(GL_ARRAY_BUFFER, vbo_id); + glBufferData(GL_ARRAY_BUFFER, 20 * sizeof(FFlatVertex), fill, GL_STATIC_DRAW); } mIndex = mCurIndex = 0; } @@ -109,15 +117,55 @@ FFlatVertexBuffer::~FFlatVertexBuffer() // //========================================================================== +CVAR(Bool, gl_testbuffer, false, 0) + void FFlatVertexBuffer::ImmRenderBuffer(unsigned int primtype, unsigned int offset, unsigned int count) { - glBegin(primtype); - for (unsigned int i = 0; i < count; i++) + if (!gl_testbuffer) // todo: remove the immediate mode calls once the uniform array method has been tested. { - glTexCoord2fv(&map[offset + i].u); - glVertex3fv(&map[offset + i].x); + glBegin(primtype); + for (unsigned int i = 0; i < count; i++) + { + glTexCoord2fv(&map[offset + i].u); + glVertex3fv(&map[offset + i].x); + } + glEnd(); + } + else + { + if (count > 20) + { + int start = offset; + FFlatVertex ff = map[offset]; + while (count > 20) + { + + if (primtype == GL_TRIANGLE_FAN) + { + // split up the fan into multiple sub-fans + map[offset] = map[start]; + glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, 20 * 5, &map[offset].x); + glDrawArrays(primtype, 0, 20); + offset += 18; + count -= 18; + } + else + { + // we only have triangle fans of this size so don't bother with strips and triangles here. + break; + } + } + map[offset] = map[start]; + glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, count * 5, &map[offset].x); + glDrawArrays(primtype, 0, count); + map[offset] = ff; + } + else + { + glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, count * 5, &map[offset].x); + glDrawArrays(primtype, 0, count); + } } - glEnd(); } //========================================================================== @@ -324,7 +372,7 @@ void FFlatVertexBuffer::CreateVBO() void FFlatVertexBuffer::BindVBO() { - if (gl.flags & RFL_BUFFER_STORAGE) + //if (gl.flags & RFL_BUFFER_STORAGE) { glBindBuffer(GL_ARRAY_BUFFER, vbo_id); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); diff --git a/src/gl/shaders/gl_shader.cpp b/src/gl/shaders/gl_shader.cpp index 76049eb1fd..bd96c1ce28 100644 --- a/src/gl/shaders/gl_shader.cpp +++ b/src/gl/shaders/gl_shader.cpp @@ -90,6 +90,13 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * if (gl.glslversion >= 3.3f) vp_comb = "#version 330 compatibility\n"; // I can't shut up the deprecation warnings in GLSL 1.3 so if available use a version with compatibility profile. // todo when using shader storage buffers, add // "#version 400 compatibility\n#extension GL_ARB_shader_storage_buffer_object : require\n" instead. + + if (!(gl.flags & RFL_BUFFER_STORAGE)) + { + // we only want the uniform array hack in the shader if we actually need it. + vp_comb << "#define UNIFORM_VB\n"; + } + vp_comb << defines << i_data.GetString().GetChars(); FString fp_comb = vp_comb; @@ -196,6 +203,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char * timer_index = glGetUniformLocation(hShader, "timer"); lights_index = glGetUniformLocation(hShader, "lights"); + fakevb_index = glGetUniformLocation(hShader, "fakeVB"); glBindAttribLocation(hShader, VATTR_VERTEX2, "aVertex2"); diff --git a/src/gl/shaders/gl_shader.h b/src/gl/shaders/gl_shader.h index d1518088d5..b079e74c46 100644 --- a/src/gl/shaders/gl_shader.h +++ b/src/gl/shaders/gl_shader.h @@ -196,6 +196,9 @@ class FShader int timer_index; int lights_index; +public: + int fakevb_index; +private: int currentglowstate; int currentfixedcolormap; @@ -245,6 +248,10 @@ public: FShader *BindEffect(int effect); void SetActiveShader(FShader *sh); void SetWarpSpeed(unsigned int eff, float speed); + FShader *GetActiveShader() const + { + return mActiveShader; + } FShader *Get(unsigned int eff) { diff --git a/wadsrc/static/shaders/glsl/main.vp b/wadsrc/static/shaders/glsl/main.vp index 3ec5990b26..81b0601c55 100644 --- a/wadsrc/static/shaders/glsl/main.vp +++ b/wadsrc/static/shaders/glsl/main.vp @@ -3,10 +3,34 @@ in vec4 aVertex2; out vec4 pixelpos; out vec2 glowdist; +#ifdef UNIFORM_VB +uniform float fakeVB[100]; +#endif void main() { - vec4 worldcoord = ModelMatrix * mix(gl_Vertex, aVertex2, uInterpolationFactor); + +#ifdef UNIFORM_VB + vec4 vert; + vec4 tc; + + if (gl_MultiTexCoord0.x >= 100000.0) + { + int fakeVI = int(gl_MultiTexCoord0.y)*5; + vert = gl_Vertex + vec4(fakeVB[fakeVI], fakeVB[fakeVI+1], fakeVB[fakeVI+2], 0.0); + tc = vec4(fakeVB[fakeVI+3], fakeVB[fakeVI+4], 0.0, 0.0); + } + else + { + vert = gl_Vertex; + tc = gl_MultiTexCoord0; + } +#else + #define vert gl_Vertex + #define tc gl_MultiTexCoord0 +#endif + + vec4 worldcoord = ModelMatrix * mix(vert, aVertex2, uInterpolationFactor); vec4 eyeCoordPos = ViewMatrix * worldcoord; gl_FrontColor = gl_Color; @@ -25,7 +49,7 @@ void main() vec2 sst = vec2(r.x/m + 0.5, r.y/m + 0.5); gl_TexCoord[0].xy = sst; #else - gl_TexCoord[0] = TextureMatrix * gl_MultiTexCoord0; + gl_TexCoord[0] = TextureMatrix * tc; #endif gl_Position = ProjectionMatrix * eyeCoordPos;