diff --git a/src/gl/dynlights/gl_lightbuffer.cpp b/src/gl/dynlights/gl_lightbuffer.cpp index 145a90327..0a9891231 100644 --- a/src/gl/dynlights/gl_lightbuffer.cpp +++ b/src/gl/dynlights/gl_lightbuffer.cpp @@ -32,13 +32,17 @@ #include "hwrenderer/dynlights/hw_dynlightdata.h" #include "hwrenderer/data/shaderuniforms.h" -static const int INITIAL_BUFFER_SIZE = 160000; // This means 80000 lights per frame and 160000*16 bytes == 2.56 MB. +static const int ELEMENTS_PER_LIGHT = 4; // each light needs 4 vec4's. +static const int ELEMENT_SIZE = (4*sizeof(float)); + FLightBuffer::FLightBuffer() { - - mBufferSize = INITIAL_BUFFER_SIZE; - mByteSize = mBufferSize * sizeof(float); + int maxNumberOfLights = gl.lightmethod == LM_DIRECT? 80000 : 40000; + + mBufferSize = maxNumberOfLights * ELEMENTS_PER_LIGHT; + mByteSize = mBufferSize * ELEMENT_SIZE; + // Hack alert: On Intel's GL driver SSBO's perform quite worse than UBOs. // We only want to disable using SSBOs for lights but not disable the feature entirely. // Note that using an uniform buffer here will limit the number of lights per surface so it isn't done for NVidia and AMD. @@ -46,15 +50,16 @@ FLightBuffer::FLightBuffer() { mBufferType = GL_SHADER_STORAGE_BUFFER; mBlockAlign = 0; - mBlockSize = mBufferSize; + mBlockSize = mBufferSize / ELEMENT_SIZE; + mMaxUploadSize = mBlockSize; } else { mBufferType = GL_UNIFORM_BUFFER; - mBlockSize = gl.maxuniformblock / 16; - if (mBlockSize > 2048) mBlockSize = 2048; // we don't really need a larger buffer - - mBlockAlign = mBlockSize / 2; + mBlockSize = gl.maxuniformblock / ELEMENT_SIZE; + mBlockAlign = gl.uniformblockalignment / ELEMENT_SIZE; + mMaxUploadSize = (mBlockSize - mBlockAlign); + mByteSize += gl.maxuniformblock; // to avoid mapping beyond the end of the buffer. } glGenBuffers(1, &mBufferId); @@ -84,96 +89,50 @@ FLightBuffer::~FLightBuffer() void FLightBuffer::Clear() { mIndex = 0; - mUploadIndex = 0; } int FLightBuffer::UploadLights(FDynLightData &data) { + // All meaasurements here are in vec4's. int size0 = data.arrays[0].Size()/4; int size1 = data.arrays[1].Size()/4; int size2 = data.arrays[2].Size()/4; int totalsize = size0 + size1 + size2 + 1; - // pointless type casting because some compilers can't print enough warnings. - if (mBlockAlign > 0 && (unsigned int)totalsize + (mIndex % mBlockAlign) > mBlockSize) + if (totalsize > (int)mMaxUploadSize) { - mIndex = ((mIndex + mBlockAlign) / mBlockAlign) * mBlockAlign; - - // can't be rendered all at once. - if ((unsigned int)totalsize > mBlockSize) + int diff = totalsize - (int)mMaxUploadSize; + + size2 -= diff; + if (size2 < 0) { - int diff = totalsize - (int)mBlockSize; - - size2 -= diff; - if (size2 < 0) - { - size1 += size2; - size2 = 0; - } - if (size1 < 0) - { - size0 += size1; - size1 = 0; - } - totalsize = size0 + size1 + size2 + 1; + size1 += size2; + size2 = 0; } + if (size1 < 0) + { + size0 += size1; + size1 = 0; + } + totalsize = size0 + size1 + size2 + 1; } - if (totalsize <= 1) return -1; + assert(mBufferPointer != nullptr); + if (mBufferPointer == nullptr) return -1; - if (mIndex + totalsize > mBufferSize/4) - { - // reallocate the buffer with twice the size - unsigned int newbuffer; + if (totalsize <= 4 || mIndex + totalsize > mBufferSize) return -1; + auto thisindex = mIndex.fetch_add(totalsize); + if (thisindex + totalsize > mBufferSize) return -1; // must retest because another thread might have changed mIndex. - // first unmap the old buffer - glBindBuffer(mBufferType, mBufferId); - glUnmapBuffer(mBufferType); - - // create and bind the new buffer, bind the old one to a copy target (too bad that DSA is not yet supported well enough to omit this crap.) - glGenBuffers(1, &newbuffer); - glBindBufferBase(mBufferType, LIGHTBUF_BINDINGPOINT, newbuffer); - glBindBuffer(mBufferType, newbuffer); // Note: Some older AMD drivers don't do that in glBindBufferBase, as they should. - glBindBuffer(GL_COPY_READ_BUFFER, mBufferId); - - // create the new buffer's storage (twice as large as the old one) - mBufferSize *= 2; - mByteSize *= 2; - if (gl.lightmethod == LM_DIRECT) - { - glBufferStorage(mBufferType, mByteSize, NULL, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); - mBufferPointer = (float*)glMapBufferRange(mBufferType, 0, mByteSize, GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT); - } - else - { - glBufferData(mBufferType, mByteSize, NULL, GL_DYNAMIC_DRAW); - mBufferPointer = (float*)glMapBufferRange(mBufferType, 0, mByteSize, GL_MAP_WRITE_BIT|GL_MAP_INVALIDATE_BUFFER_BIT); - } - - // copy contents and delete the old buffer. - glCopyBufferSubData(GL_COPY_READ_BUFFER, mBufferType, 0, 0, mByteSize/2); - glBindBuffer(GL_COPY_READ_BUFFER, 0); - glDeleteBuffers(1, &mBufferId); - mBufferId = newbuffer; - } - - float *copyptr; - - assert(mBufferPointer != NULL); - if (mBufferPointer == NULL) return -1; - copyptr = mBufferPointer + mIndex * 4; + float *copyptr = mBufferPointer + thisindex*4; float parmcnt[] = { 0, float(size0), float(size0 + size1), float(size0 + size1 + size2) }; + memcpy(©ptr[0], parmcnt, ELEMENT_SIZE); + memcpy(©ptr[4], &data.arrays[0][0], size0 * ELEMENT_SIZE); + memcpy(©ptr[4 + 4*size0], &data.arrays[1][0], size1 * ELEMENT_SIZE); + memcpy(©ptr[4 + 4*(size0 + size1)], &data.arrays[2][0], size2 * ELEMENT_SIZE); - memcpy(©ptr[0], parmcnt, 4 * sizeof(float)); - memcpy(©ptr[4], &data.arrays[0][0], 4 * size0*sizeof(float)); - memcpy(©ptr[4 + 4*size0], &data.arrays[1][0], 4 * size1*sizeof(float)); - memcpy(©ptr[4 + 4*(size0 + size1)], &data.arrays[2][0], 4 * size2*sizeof(float)); - - unsigned int bufferindex = mIndex; - mIndex += totalsize; - draw_dlight += (totalsize-1) / 2; - return bufferindex; + return thisindex; } void FLightBuffer::Begin() @@ -203,7 +162,7 @@ int FLightBuffer::BindUBO(unsigned int index) { // this will only get called if a uniform buffer is used. For a shader storage buffer we only need to bind the buffer once at the start to all shader programs mLastMappedIndex = offset; - glBindBufferRange(GL_UNIFORM_BUFFER, LIGHTBUF_BINDINGPOINT, mBufferId, offset*16, mBlockSize*16); // we go from counting vec4's to counting bytes here. + glBindBufferRange(GL_UNIFORM_BUFFER, LIGHTBUF_BINDINGPOINT, mBufferId, offset * ELEMENT_SIZE, mBlockSize * ELEMENT_SIZE); } return (index - offset); } diff --git a/src/gl/dynlights/gl_lightbuffer.h b/src/gl/dynlights/gl_lightbuffer.h index be2026d09..95b1b6499 100644 --- a/src/gl/dynlights/gl_lightbuffer.h +++ b/src/gl/dynlights/gl_lightbuffer.h @@ -3,6 +3,7 @@ #include "tarray.h" #include "hwrenderer/dynlights/hw_dynlightdata.h" +#include class FLightBuffer { @@ -10,13 +11,13 @@ class FLightBuffer float * mBufferPointer; unsigned int mBufferType; - unsigned int mIndex; - unsigned int mUploadIndex; + std::atomic mIndex; unsigned int mLastMappedIndex; unsigned int mBlockAlign; unsigned int mBlockSize; unsigned int mBufferSize; unsigned int mByteSize; + unsigned int mMaxUploadSize; public: