- use a uniform array to store vertex data to render dynamic stuff on GL 3.x hardware without the ARB_buffer_storage extension.

Due to the way the engine works it needs to render a lot of small primitives with frequent state changes.
But due to the performance of buffer uploads it is impossible to upload each primitive's vertices to a buffer separately because buffer uploads nearly always stall the GPU.
On the other hand, in order to reduce the amount of buffer uploads all the necessary state changes would have to be saved in an array until they can finally be used. This method also imposed an unacceptable overhead.
Fortunately, uploading uniform arrays is very fast and doesn't cause GPU stalls, so now the engine puts the vertex data per primitive into a uniform array and uses a static vertex buffer to index the array in the vertex shader.
This method offers the same performance as immediate mode but only uses core profile features.
This commit is contained in:
Christoph Oelckers 2014-06-30 18:10:55 +02:00
parent 6efefd9b7f
commit f710518903
4 changed files with 95 additions and 8 deletions

View file

@ -90,6 +90,14 @@ FFlatVertexBuffer::FFlatVertexBuffer()
{
vbo_shadowdata.Reserve(BUFFER_SIZE);
map = &vbo_shadowdata[0];
FFlatVertex fill[20];
for (int i = 0; i < 20; i++)
{
fill[i].Set(0, 0, 0, 100001.f, i);
}
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
glBufferData(GL_ARRAY_BUFFER, 20 * sizeof(FFlatVertex), fill, GL_STATIC_DRAW);
}
mIndex = mCurIndex = 0;
}
@ -109,7 +117,11 @@ FFlatVertexBuffer::~FFlatVertexBuffer()
//
//==========================================================================
CVAR(Bool, gl_testbuffer, false, 0)
void FFlatVertexBuffer::ImmRenderBuffer(unsigned int primtype, unsigned int offset, unsigned int count)
{
if (!gl_testbuffer) // todo: remove the immediate mode calls once the uniform array method has been tested.
{
glBegin(primtype);
for (unsigned int i = 0; i < count; i++)
@ -119,6 +131,42 @@ void FFlatVertexBuffer::ImmRenderBuffer(unsigned int primtype, unsigned int offs
}
glEnd();
}
else
{
if (count > 20)
{
int start = offset;
FFlatVertex ff = map[offset];
while (count > 20)
{
if (primtype == GL_TRIANGLE_FAN)
{
// split up the fan into multiple sub-fans
map[offset] = map[start];
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, 20 * 5, &map[offset].x);
glDrawArrays(primtype, 0, 20);
offset += 18;
count -= 18;
}
else
{
// we only have triangle fans of this size so don't bother with strips and triangles here.
break;
}
}
map[offset] = map[start];
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, count * 5, &map[offset].x);
glDrawArrays(primtype, 0, count);
map[offset] = ff;
}
else
{
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, count * 5, &map[offset].x);
glDrawArrays(primtype, 0, count);
}
}
}
//==========================================================================
//
@ -324,7 +372,7 @@ void FFlatVertexBuffer::CreateVBO()
void FFlatVertexBuffer::BindVBO()
{
if (gl.flags & RFL_BUFFER_STORAGE)
//if (gl.flags & RFL_BUFFER_STORAGE)
{
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);

View file

@ -90,6 +90,13 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
if (gl.glslversion >= 3.3f) vp_comb = "#version 330 compatibility\n"; // I can't shut up the deprecation warnings in GLSL 1.3 so if available use a version with compatibility profile.
// todo when using shader storage buffers, add
// "#version 400 compatibility\n#extension GL_ARB_shader_storage_buffer_object : require\n" instead.
if (!(gl.flags & RFL_BUFFER_STORAGE))
{
// we only want the uniform array hack in the shader if we actually need it.
vp_comb << "#define UNIFORM_VB\n";
}
vp_comb << defines << i_data.GetString().GetChars();
FString fp_comb = vp_comb;
@ -196,6 +203,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
timer_index = glGetUniformLocation(hShader, "timer");
lights_index = glGetUniformLocation(hShader, "lights");
fakevb_index = glGetUniformLocation(hShader, "fakeVB");
glBindAttribLocation(hShader, VATTR_VERTEX2, "aVertex2");

View file

@ -196,6 +196,9 @@ class FShader
int timer_index;
int lights_index;
public:
int fakevb_index;
private:
int currentglowstate;
int currentfixedcolormap;
@ -245,6 +248,10 @@ public:
FShader *BindEffect(int effect);
void SetActiveShader(FShader *sh);
void SetWarpSpeed(unsigned int eff, float speed);
FShader *GetActiveShader() const
{
return mActiveShader;
}
FShader *Get(unsigned int eff)
{

View file

@ -3,10 +3,34 @@ in vec4 aVertex2;
out vec4 pixelpos;
out vec2 glowdist;
#ifdef UNIFORM_VB
uniform float fakeVB[100];
#endif
void main()
{
vec4 worldcoord = ModelMatrix * mix(gl_Vertex, aVertex2, uInterpolationFactor);
#ifdef UNIFORM_VB
vec4 vert;
vec4 tc;
if (gl_MultiTexCoord0.x >= 100000.0)
{
int fakeVI = int(gl_MultiTexCoord0.y)*5;
vert = gl_Vertex + vec4(fakeVB[fakeVI], fakeVB[fakeVI+1], fakeVB[fakeVI+2], 0.0);
tc = vec4(fakeVB[fakeVI+3], fakeVB[fakeVI+4], 0.0, 0.0);
}
else
{
vert = gl_Vertex;
tc = gl_MultiTexCoord0;
}
#else
#define vert gl_Vertex
#define tc gl_MultiTexCoord0
#endif
vec4 worldcoord = ModelMatrix * mix(vert, aVertex2, uInterpolationFactor);
vec4 eyeCoordPos = ViewMatrix * worldcoord;
gl_FrontColor = gl_Color;
@ -25,7 +49,7 @@ void main()
vec2 sst = vec2(r.x/m + 0.5, r.y/m + 0.5);
gl_TexCoord[0].xy = sst;
#else
gl_TexCoord[0] = TextureMatrix * gl_MultiTexCoord0;
gl_TexCoord[0] = TextureMatrix * tc;
#endif
gl_Position = ProjectionMatrix * eyeCoordPos;