mirror of
https://github.com/ZDoom/gzdoom-gles.git
synced 2024-11-11 15:22:15 +00:00
Prepared light buffer for multithreaded use
This necessitated removing the reallocator because that cannot be done in a multithreaded context in OpenGL. The buffer should be large enough anyway, it it all gets used, slowdowns would be unavoidable. There was also some simplification of the buffer alignment math for uniform buffers
This commit is contained in:
parent
84a55667d9
commit
ad80efd6be
2 changed files with 43 additions and 83 deletions
|
@ -32,13 +32,17 @@
|
||||||
#include "hwrenderer/dynlights/hw_dynlightdata.h"
|
#include "hwrenderer/dynlights/hw_dynlightdata.h"
|
||||||
#include "hwrenderer/data/shaderuniforms.h"
|
#include "hwrenderer/data/shaderuniforms.h"
|
||||||
|
|
||||||
static const int INITIAL_BUFFER_SIZE = 160000; // This means 80000 lights per frame and 160000*16 bytes == 2.56 MB.
|
static const int ELEMENTS_PER_LIGHT = 4; // each light needs 4 vec4's.
|
||||||
|
static const int ELEMENT_SIZE = (4*sizeof(float));
|
||||||
|
|
||||||
|
|
||||||
FLightBuffer::FLightBuffer()
|
FLightBuffer::FLightBuffer()
|
||||||
{
|
{
|
||||||
|
int maxNumberOfLights = gl.lightmethod == LM_DIRECT? 80000 : 40000;
|
||||||
|
|
||||||
|
mBufferSize = maxNumberOfLights * ELEMENTS_PER_LIGHT;
|
||||||
|
mByteSize = mBufferSize * ELEMENT_SIZE;
|
||||||
|
|
||||||
mBufferSize = INITIAL_BUFFER_SIZE;
|
|
||||||
mByteSize = mBufferSize * sizeof(float);
|
|
||||||
// Hack alert: On Intel's GL driver SSBO's perform quite worse than UBOs.
|
// Hack alert: On Intel's GL driver SSBO's perform quite worse than UBOs.
|
||||||
// We only want to disable using SSBOs for lights but not disable the feature entirely.
|
// We only want to disable using SSBOs for lights but not disable the feature entirely.
|
||||||
// Note that using an uniform buffer here will limit the number of lights per surface so it isn't done for NVidia and AMD.
|
// Note that using an uniform buffer here will limit the number of lights per surface so it isn't done for NVidia and AMD.
|
||||||
|
@ -46,15 +50,16 @@ FLightBuffer::FLightBuffer()
|
||||||
{
|
{
|
||||||
mBufferType = GL_SHADER_STORAGE_BUFFER;
|
mBufferType = GL_SHADER_STORAGE_BUFFER;
|
||||||
mBlockAlign = 0;
|
mBlockAlign = 0;
|
||||||
mBlockSize = mBufferSize;
|
mBlockSize = mBufferSize / ELEMENT_SIZE;
|
||||||
|
mMaxUploadSize = mBlockSize;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
mBufferType = GL_UNIFORM_BUFFER;
|
mBufferType = GL_UNIFORM_BUFFER;
|
||||||
mBlockSize = gl.maxuniformblock / 16;
|
mBlockSize = gl.maxuniformblock / ELEMENT_SIZE;
|
||||||
if (mBlockSize > 2048) mBlockSize = 2048; // we don't really need a larger buffer
|
mBlockAlign = gl.uniformblockalignment / ELEMENT_SIZE;
|
||||||
|
mMaxUploadSize = (mBlockSize - mBlockAlign);
|
||||||
mBlockAlign = mBlockSize / 2;
|
mByteSize += gl.maxuniformblock; // to avoid mapping beyond the end of the buffer.
|
||||||
}
|
}
|
||||||
|
|
||||||
glGenBuffers(1, &mBufferId);
|
glGenBuffers(1, &mBufferId);
|
||||||
|
@ -84,25 +89,19 @@ FLightBuffer::~FLightBuffer()
|
||||||
void FLightBuffer::Clear()
|
void FLightBuffer::Clear()
|
||||||
{
|
{
|
||||||
mIndex = 0;
|
mIndex = 0;
|
||||||
mUploadIndex = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int FLightBuffer::UploadLights(FDynLightData &data)
|
int FLightBuffer::UploadLights(FDynLightData &data)
|
||||||
{
|
{
|
||||||
|
// All meaasurements here are in vec4's.
|
||||||
int size0 = data.arrays[0].Size()/4;
|
int size0 = data.arrays[0].Size()/4;
|
||||||
int size1 = data.arrays[1].Size()/4;
|
int size1 = data.arrays[1].Size()/4;
|
||||||
int size2 = data.arrays[2].Size()/4;
|
int size2 = data.arrays[2].Size()/4;
|
||||||
int totalsize = size0 + size1 + size2 + 1;
|
int totalsize = size0 + size1 + size2 + 1;
|
||||||
|
|
||||||
// pointless type casting because some compilers can't print enough warnings.
|
if (totalsize > (int)mMaxUploadSize)
|
||||||
if (mBlockAlign > 0 && (unsigned int)totalsize + (mIndex % mBlockAlign) > mBlockSize)
|
|
||||||
{
|
{
|
||||||
mIndex = ((mIndex + mBlockAlign) / mBlockAlign) * mBlockAlign;
|
int diff = totalsize - (int)mMaxUploadSize;
|
||||||
|
|
||||||
// can't be rendered all at once.
|
|
||||||
if ((unsigned int)totalsize > mBlockSize)
|
|
||||||
{
|
|
||||||
int diff = totalsize - (int)mBlockSize;
|
|
||||||
|
|
||||||
size2 -= diff;
|
size2 -= diff;
|
||||||
if (size2 < 0)
|
if (size2 < 0)
|
||||||
|
@ -117,63 +116,23 @@ int FLightBuffer::UploadLights(FDynLightData &data)
|
||||||
}
|
}
|
||||||
totalsize = size0 + size1 + size2 + 1;
|
totalsize = size0 + size1 + size2 + 1;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (totalsize <= 1) return -1;
|
assert(mBufferPointer != nullptr);
|
||||||
|
if (mBufferPointer == nullptr) return -1;
|
||||||
|
|
||||||
if (mIndex + totalsize > mBufferSize/4)
|
if (totalsize <= 4 || mIndex + totalsize > mBufferSize) return -1;
|
||||||
{
|
auto thisindex = mIndex.fetch_add(totalsize);
|
||||||
// reallocate the buffer with twice the size
|
if (thisindex + totalsize > mBufferSize) return -1; // must retest because another thread might have changed mIndex.
|
||||||
unsigned int newbuffer;
|
|
||||||
|
|
||||||
// first unmap the old buffer
|
float *copyptr = mBufferPointer + thisindex*4;
|
||||||
glBindBuffer(mBufferType, mBufferId);
|
|
||||||
glUnmapBuffer(mBufferType);
|
|
||||||
|
|
||||||
// create and bind the new buffer, bind the old one to a copy target (too bad that DSA is not yet supported well enough to omit this crap.)
|
|
||||||
glGenBuffers(1, &newbuffer);
|
|
||||||
glBindBufferBase(mBufferType, LIGHTBUF_BINDINGPOINT, newbuffer);
|
|
||||||
glBindBuffer(mBufferType, newbuffer); // Note: Some older AMD drivers don't do that in glBindBufferBase, as they should.
|
|
||||||
glBindBuffer(GL_COPY_READ_BUFFER, mBufferId);
|
|
||||||
|
|
||||||
// create the new buffer's storage (twice as large as the old one)
|
|
||||||
mBufferSize *= 2;
|
|
||||||
mByteSize *= 2;
|
|
||||||
if (gl.lightmethod == LM_DIRECT)
|
|
||||||
{
|
|
||||||
glBufferStorage(mBufferType, mByteSize, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
|
|
||||||
mBufferPointer = (float*)glMapBufferRange(mBufferType, 0, mByteSize, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
glBufferData(mBufferType, mByteSize, NULL, GL_DYNAMIC_DRAW);
|
|
||||||
mBufferPointer = (float*)glMapBufferRange(mBufferType, 0, mByteSize, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_BUFFER_BIT);
|
|
||||||
}
|
|
||||||
|
|
||||||
// copy contents and delete the old buffer.
|
|
||||||
glCopyBufferSubData(GL_COPY_READ_BUFFER, mBufferType, 0, 0, mByteSize/2);
|
|
||||||
glBindBuffer(GL_COPY_READ_BUFFER, 0);
|
|
||||||
glDeleteBuffers(1, &mBufferId);
|
|
||||||
mBufferId = newbuffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
float *copyptr;
|
|
||||||
|
|
||||||
assert(mBufferPointer != NULL);
|
|
||||||
if (mBufferPointer == NULL) return -1;
|
|
||||||
copyptr = mBufferPointer + mIndex * 4;
|
|
||||||
|
|
||||||
float parmcnt[] = { 0, float(size0), float(size0 + size1), float(size0 + size1 + size2) };
|
float parmcnt[] = { 0, float(size0), float(size0 + size1), float(size0 + size1 + size2) };
|
||||||
|
memcpy(©ptr[0], parmcnt, ELEMENT_SIZE);
|
||||||
|
memcpy(©ptr[4], &data.arrays[0][0], size0 * ELEMENT_SIZE);
|
||||||
|
memcpy(©ptr[4 + 4*size0], &data.arrays[1][0], size1 * ELEMENT_SIZE);
|
||||||
|
memcpy(©ptr[4 + 4*(size0 + size1)], &data.arrays[2][0], size2 * ELEMENT_SIZE);
|
||||||
|
|
||||||
memcpy(©ptr[0], parmcnt, 4 * sizeof(float));
|
return thisindex;
|
||||||
memcpy(©ptr[4], &data.arrays[0][0], 4 * size0*sizeof(float));
|
|
||||||
memcpy(©ptr[4 + 4*size0], &data.arrays[1][0], 4 * size1*sizeof(float));
|
|
||||||
memcpy(©ptr[4 + 4*(size0 + size1)], &data.arrays[2][0], 4 * size2*sizeof(float));
|
|
||||||
|
|
||||||
unsigned int bufferindex = mIndex;
|
|
||||||
mIndex += totalsize;
|
|
||||||
draw_dlight += (totalsize-1) / 2;
|
|
||||||
return bufferindex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void FLightBuffer::Begin()
|
void FLightBuffer::Begin()
|
||||||
|
@ -203,7 +162,7 @@ int FLightBuffer::BindUBO(unsigned int index)
|
||||||
{
|
{
|
||||||
// this will only get called if a uniform buffer is used. For a shader storage buffer we only need to bind the buffer once at the start to all shader programs
|
// this will only get called if a uniform buffer is used. For a shader storage buffer we only need to bind the buffer once at the start to all shader programs
|
||||||
mLastMappedIndex = offset;
|
mLastMappedIndex = offset;
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, LIGHTBUF_BINDINGPOINT, mBufferId, offset*16, mBlockSize*16); // we go from counting vec4's to counting bytes here.
|
glBindBufferRange(GL_UNIFORM_BUFFER, LIGHTBUF_BINDINGPOINT, mBufferId, offset * ELEMENT_SIZE, mBlockSize * ELEMENT_SIZE);
|
||||||
}
|
}
|
||||||
return (index - offset);
|
return (index - offset);
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#include "tarray.h"
|
#include "tarray.h"
|
||||||
#include "hwrenderer/dynlights/hw_dynlightdata.h"
|
#include "hwrenderer/dynlights/hw_dynlightdata.h"
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
class FLightBuffer
|
class FLightBuffer
|
||||||
{
|
{
|
||||||
|
@ -10,13 +11,13 @@ class FLightBuffer
|
||||||
float * mBufferPointer;
|
float * mBufferPointer;
|
||||||
|
|
||||||
unsigned int mBufferType;
|
unsigned int mBufferType;
|
||||||
unsigned int mIndex;
|
std::atomic<unsigned int> mIndex;
|
||||||
unsigned int mUploadIndex;
|
|
||||||
unsigned int mLastMappedIndex;
|
unsigned int mLastMappedIndex;
|
||||||
unsigned int mBlockAlign;
|
unsigned int mBlockAlign;
|
||||||
unsigned int mBlockSize;
|
unsigned int mBlockSize;
|
||||||
unsigned int mBufferSize;
|
unsigned int mBufferSize;
|
||||||
unsigned int mByteSize;
|
unsigned int mByteSize;
|
||||||
|
unsigned int mMaxUploadSize;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue