mirror of
https://github.com/ZDoom/qzdoom-gpl.git
synced 2024-11-24 04:41:53 +00:00
- use a uniform array to store vertex data to render dynamic stuff on GL 3.x hardware without the ARB_buffer_storage extension.
Due to the way the engine works it needs to render a lot of small primitives with frequent state changes. But due to the performance of buffer uploads it is impossible to upload each primitive's vertices to a buffer separately because buffer uploads nearly always stall the GPU. On the other hand, in order to reduce the amount of buffer uploads all the necessary state changes would have to be saved in an array until they can finally be used. This method also imposed an unacceptable overhead. Fortunately, uploading uniform arrays is very fast and doesn't cause GPU stalls, so now the engine puts the vertex data per primitive into a uniform array and uses a static vertex buffer to index the array in the vertex shader. This method offers the same performance as immediate mode but only uses core profile features.
This commit is contained in:
parent
6efefd9b7f
commit
f710518903
4 changed files with 95 additions and 8 deletions
|
@ -90,6 +90,14 @@ FFlatVertexBuffer::FFlatVertexBuffer()
|
|||
{
|
||||
vbo_shadowdata.Reserve(BUFFER_SIZE);
|
||||
map = &vbo_shadowdata[0];
|
||||
|
||||
FFlatVertex fill[20];
|
||||
for (int i = 0; i < 20; i++)
|
||||
{
|
||||
fill[i].Set(0, 0, 0, 100001.f, i);
|
||||
}
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
|
||||
glBufferData(GL_ARRAY_BUFFER, 20 * sizeof(FFlatVertex), fill, GL_STATIC_DRAW);
|
||||
}
|
||||
mIndex = mCurIndex = 0;
|
||||
}
|
||||
|
@ -109,15 +117,55 @@ FFlatVertexBuffer::~FFlatVertexBuffer()
|
|||
//
|
||||
//==========================================================================
|
||||
|
||||
CVAR(Bool, gl_testbuffer, false, 0)
|
||||
|
||||
void FFlatVertexBuffer::ImmRenderBuffer(unsigned int primtype, unsigned int offset, unsigned int count)
|
||||
{
|
||||
glBegin(primtype);
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
if (!gl_testbuffer) // todo: remove the immediate mode calls once the uniform array method has been tested.
|
||||
{
|
||||
glTexCoord2fv(&map[offset + i].u);
|
||||
glVertex3fv(&map[offset + i].x);
|
||||
glBegin(primtype);
|
||||
for (unsigned int i = 0; i < count; i++)
|
||||
{
|
||||
glTexCoord2fv(&map[offset + i].u);
|
||||
glVertex3fv(&map[offset + i].x);
|
||||
}
|
||||
glEnd();
|
||||
}
|
||||
else
|
||||
{
|
||||
if (count > 20)
|
||||
{
|
||||
int start = offset;
|
||||
FFlatVertex ff = map[offset];
|
||||
while (count > 20)
|
||||
{
|
||||
|
||||
if (primtype == GL_TRIANGLE_FAN)
|
||||
{
|
||||
// split up the fan into multiple sub-fans
|
||||
map[offset] = map[start];
|
||||
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, 20 * 5, &map[offset].x);
|
||||
glDrawArrays(primtype, 0, 20);
|
||||
offset += 18;
|
||||
count -= 18;
|
||||
}
|
||||
else
|
||||
{
|
||||
// we only have triangle fans of this size so don't bother with strips and triangles here.
|
||||
break;
|
||||
}
|
||||
}
|
||||
map[offset] = map[start];
|
||||
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, count * 5, &map[offset].x);
|
||||
glDrawArrays(primtype, 0, count);
|
||||
map[offset] = ff;
|
||||
}
|
||||
else
|
||||
{
|
||||
glUniform1fv(GLRenderer->mShaderManager->GetActiveShader()->fakevb_index, count * 5, &map[offset].x);
|
||||
glDrawArrays(primtype, 0, count);
|
||||
}
|
||||
}
|
||||
glEnd();
|
||||
}
|
||||
|
||||
//==========================================================================
|
||||
|
@ -324,7 +372,7 @@ void FFlatVertexBuffer::CreateVBO()
|
|||
|
||||
void FFlatVertexBuffer::BindVBO()
|
||||
{
|
||||
if (gl.flags & RFL_BUFFER_STORAGE)
|
||||
//if (gl.flags & RFL_BUFFER_STORAGE)
|
||||
{
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo_id);
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
|
||||
|
|
|
@ -90,6 +90,13 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
|
|||
if (gl.glslversion >= 3.3f) vp_comb = "#version 330 compatibility\n"; // I can't shut up the deprecation warnings in GLSL 1.3 so if available use a version with compatibility profile.
|
||||
// todo when using shader storage buffers, add
|
||||
// "#version 400 compatibility\n#extension GL_ARB_shader_storage_buffer_object : require\n" instead.
|
||||
|
||||
if (!(gl.flags & RFL_BUFFER_STORAGE))
|
||||
{
|
||||
// we only want the uniform array hack in the shader if we actually need it.
|
||||
vp_comb << "#define UNIFORM_VB\n";
|
||||
}
|
||||
|
||||
vp_comb << defines << i_data.GetString().GetChars();
|
||||
FString fp_comb = vp_comb;
|
||||
|
||||
|
@ -196,6 +203,7 @@ bool FShader::Load(const char * name, const char * vert_prog_lump, const char *
|
|||
|
||||
timer_index = glGetUniformLocation(hShader, "timer");
|
||||
lights_index = glGetUniformLocation(hShader, "lights");
|
||||
fakevb_index = glGetUniformLocation(hShader, "fakeVB");
|
||||
|
||||
glBindAttribLocation(hShader, VATTR_VERTEX2, "aVertex2");
|
||||
|
||||
|
|
|
@ -196,6 +196,9 @@ class FShader
|
|||
|
||||
int timer_index;
|
||||
int lights_index;
|
||||
public:
|
||||
int fakevb_index;
|
||||
private:
|
||||
int currentglowstate;
|
||||
int currentfixedcolormap;
|
||||
|
||||
|
@ -245,6 +248,10 @@ public:
|
|||
FShader *BindEffect(int effect);
|
||||
void SetActiveShader(FShader *sh);
|
||||
void SetWarpSpeed(unsigned int eff, float speed);
|
||||
FShader *GetActiveShader() const
|
||||
{
|
||||
return mActiveShader;
|
||||
}
|
||||
|
||||
FShader *Get(unsigned int eff)
|
||||
{
|
||||
|
|
|
@ -3,10 +3,34 @@ in vec4 aVertex2;
|
|||
out vec4 pixelpos;
|
||||
out vec2 glowdist;
|
||||
|
||||
#ifdef UNIFORM_VB
|
||||
uniform float fakeVB[100];
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
vec4 worldcoord = ModelMatrix * mix(gl_Vertex, aVertex2, uInterpolationFactor);
|
||||
|
||||
#ifdef UNIFORM_VB
|
||||
vec4 vert;
|
||||
vec4 tc;
|
||||
|
||||
if (gl_MultiTexCoord0.x >= 100000.0)
|
||||
{
|
||||
int fakeVI = int(gl_MultiTexCoord0.y)*5;
|
||||
vert = gl_Vertex + vec4(fakeVB[fakeVI], fakeVB[fakeVI+1], fakeVB[fakeVI+2], 0.0);
|
||||
tc = vec4(fakeVB[fakeVI+3], fakeVB[fakeVI+4], 0.0, 0.0);
|
||||
}
|
||||
else
|
||||
{
|
||||
vert = gl_Vertex;
|
||||
tc = gl_MultiTexCoord0;
|
||||
}
|
||||
#else
|
||||
#define vert gl_Vertex
|
||||
#define tc gl_MultiTexCoord0
|
||||
#endif
|
||||
|
||||
vec4 worldcoord = ModelMatrix * mix(vert, aVertex2, uInterpolationFactor);
|
||||
vec4 eyeCoordPos = ViewMatrix * worldcoord;
|
||||
|
||||
gl_FrontColor = gl_Color;
|
||||
|
@ -25,7 +49,7 @@ void main()
|
|||
vec2 sst = vec2(r.x/m + 0.5, r.y/m + 0.5);
|
||||
gl_TexCoord[0].xy = sst;
|
||||
#else
|
||||
gl_TexCoord[0] = TextureMatrix * gl_MultiTexCoord0;
|
||||
gl_TexCoord[0] = TextureMatrix * tc;
|
||||
#endif
|
||||
|
||||
gl_Position = ProjectionMatrix * eyeCoordPos;
|
||||
|
|
Loading…
Reference in a new issue