- simplified the render job interface.

Since the job nodes were already taken from a static array, the added linked list isn't really needed. All we need is a read and a write pointer into the array, This can even be done without a spinlock as long as we assume that the list never overflows.
This commit is contained in:
Christoph Oelckers 2018-10-31 09:49:07 +01:00
parent e9c2247ff4
commit 01a0af8ad1
3 changed files with 46 additions and 122 deletions

View file

@ -31,6 +31,7 @@
#include "g_levellocals.h" #include "g_levellocals.h"
#include "p_effect.h" #include "p_effect.h"
#include "po_man.h" #include "po_man.h"
#include "m_fixed.h"
#include "ctpl.h" #include "ctpl.h"
#include "hwrenderer/scene/hw_fakeflat.h" #include "hwrenderer/scene/hw_fakeflat.h"
#include "hwrenderer/scene/hw_clipper.h" #include "hwrenderer/scene/hw_clipper.h"
@ -42,18 +43,9 @@
thread_local bool isWorkerThread; thread_local bool isWorkerThread;
ctpl::thread_pool renderPool; ctpl::thread_pool renderPool(1);
bool inited = false; bool inited = false;
void InitRenderPool()
{
if (!inited)
{
inited = true;
renderPool.resize(1); // we only need one worker.
}
}
struct RenderJob struct RenderJob
{ {
enum enum
@ -68,112 +60,39 @@ struct RenderJob
int type; int type;
subsector_t *sub; subsector_t *sub;
seg_t *seg; seg_t *seg;
RenderJob *Next;
};
// Used for a few things where the overhead of a full-blown mutex would be too costly.
// Code taken from http://www.modernescpp.com/index.php/the-atomic-flag
class Spinlock
{
std::atomic_bool flag = false;
public:
void lock()
{
do
{
while (flag.load(std::memory_order_relaxed))
{
_mm_pause();
}
} while (flag.exchange(true, std::memory_order_acquire));
}
void unlock()
{
flag.store(false, std::memory_order_release);
}
};
template<class T> class TRenderList
{
T *mHead = nullptr;
T *mTail = nullptr;
Spinlock mLock;
public:
// Since we do not own the elements we will not free them.
void Clear()
{
mHead = nullptr;
mTail = nullptr;
}
T* Head() const
{
return mHead;
}
void AddTail(T *element)
{
mLock.lock();
if (mHead == nullptr) mHead = element;
if (mTail != nullptr) mTail->Next = element;
mTail = element;
element->Next = nullptr;
mLock.unlock();
}
void AddHead(T *element)
{
mLock.lock();
element->Next = mHead;
mHead = element;
mLock.unlock();
}
T* GetHead()
{
if (mHead == nullptr) return nullptr; // handle an empty list without thrashing the spinlock.
mLock.lock();
auto val = mHead;
if (val != nullptr) mHead = val->Next;
mLock.unlock();
return val;
}
}; };
class RenderJobQueue class RenderJobQueue
{ {
RenderJob pool[200000]; // Way more than ever needed. RenderJob pool[300000]; // Way more than ever needed. The largest ever seen on a single viewpoint is around 40000.
int poolindex = 0; std::atomic<int> readindex = 0;
TRenderList<RenderJob> jobList; std::atomic<int> writeindex = 0;
public: public:
void AddJob(int type, subsector_t *sub, seg_t *seg = nullptr) void AddJob(int type, subsector_t *sub, seg_t *seg = nullptr)
{ {
RenderJob *job = &pool[poolindex++]; // This does not check for array overflows. The pool should be large enough that it never hits the limit.
*job = { type, sub, seg, nullptr };
jobList.AddTail(job); pool[writeindex] = { type, sub, seg };
writeindex++; // update index only after the value has been written.
} }
RenderJob *GetJob() RenderJob *GetJob()
{ {
return jobList.GetHead(); if (readindex < writeindex) return &pool[readindex++];
return nullptr;
} }
void ReleaseAll() void ReleaseAll()
{ {
poolindex = 0; readindex = 0;
writeindex = 0;
} }
}; };
RenderJobQueue jobQueue; static RenderJobQueue jobQueue; // One static queue is sufficient here. This code will never be called recursively.
void WorkerThread(HWDrawInfo *di) void HWDrawInfo::WorkerThread()
{ {
sector_t fakefront, fakeback, *front, *back; sector_t fakefront, fakeback, *front, *back;
@ -183,7 +102,7 @@ void WorkerThread(HWDrawInfo *di)
auto job = jobQueue.GetJob(); auto job = jobQueue.GetJob();
if (job == nullptr) if (job == nullptr)
{ {
// The queue is empty. But here yielding would be too costly and possibly cause further delays down the line if the thread is halted. // The queue is empty. But yielding would be too costly here and possibly cause further delays down the line if the thread is halted.
// So instead add a few pause instructions and retry immediately. // So instead add a few pause instructions and retry immediately.
_mm_pause(); _mm_pause();
_mm_pause(); _mm_pause();
@ -199,6 +118,7 @@ void WorkerThread(HWDrawInfo *di)
else switch (job->type) else switch (job->type)
{ {
case RenderJob::TerminateJob: case RenderJob::TerminateJob:
PreparePlayerSprites(Viewpoint.sector, in_area);
return; return;
case RenderJob::WallJob: case RenderJob::WallJob:
@ -206,9 +126,9 @@ void WorkerThread(HWDrawInfo *di)
GLWall wall; GLWall wall;
SetupWall.Clock(); SetupWall.Clock();
wall.sub = job->sub; wall.sub = job->sub;
front = hw_FakeFlat(job->sub->render_sector, &fakefront, di->in_area, false); front = hw_FakeFlat(job->sub->render_sector, &fakefront, in_area, false);
back = job->seg->PartnerSeg ? hw_FakeFlat(job->seg->PartnerSeg->Subsector->render_sector, &fakeback, di->in_area, true) : nullptr; back = job->seg->PartnerSeg ? hw_FakeFlat(job->seg->PartnerSeg->Subsector->render_sector, &fakeback, in_area, true) : nullptr;
wall.Process(di, job->seg, front, back); wall.Process(this, job->seg, front, back);
rendered_lines++; rendered_lines++;
SetupWall.Unclock(); SetupWall.Unclock();
break; break;
@ -218,33 +138,33 @@ void WorkerThread(HWDrawInfo *di)
{ {
GLFlat flat; GLFlat flat;
SetupFlat.Clock(); SetupFlat.Clock();
front = hw_FakeFlat(job->sub->render_sector, &fakefront, di->in_area, false); front = hw_FakeFlat(job->sub->render_sector, &fakefront, in_area, false);
flat.ProcessSector(di, front); flat.ProcessSector(this, front);
SetupFlat.Unclock(); SetupFlat.Unclock();
break; break;
} }
case RenderJob::SpriteJob: case RenderJob::SpriteJob:
SetupSprite.Clock(); SetupSprite.Clock();
front = hw_FakeFlat(job->sub->render_sector, &fakefront, di->in_area, false); front = hw_FakeFlat(job->sub->render_sector, &fakefront, in_area, false);
di->RenderThings(job->sub, front); RenderThings(job->sub, front);
SetupSprite.Unclock(); SetupSprite.Unclock();
break; break;
case RenderJob::ParticleJob: case RenderJob::ParticleJob:
{ {
SetupSprite.Clock(); SetupSprite.Clock();
front = hw_FakeFlat(job->sub->render_sector, &fakefront, di->in_area, false); front = hw_FakeFlat(job->sub->render_sector, &fakefront, in_area, false);
for (int i = ParticlesInSubsec[job->sub->Index()]; i != NO_PARTICLE; i = Particles[i].snext) for (int i = ParticlesInSubsec[job->sub->Index()]; i != NO_PARTICLE; i = Particles[i].snext)
{ {
if (di->mClipPortal) if (mClipPortal)
{ {
int clipres = di->mClipPortal->ClipPoint(Particles[i].Pos); int clipres = mClipPortal->ClipPoint(Particles[i].Pos);
if (clipres == PClip_InFront) continue; if (clipres == PClip_InFront) continue;
} }
GLSprite sprite; GLSprite sprite;
sprite.ProcessParticle(di, &Particles[i], front); sprite.ProcessParticle(this, &Particles[i], front);
} }
SetupSprite.Unclock(); SetupSprite.Unclock();
break; break;
@ -788,12 +708,26 @@ void HWDrawInfo::RenderBSPNode (void *node)
void HWDrawInfo::RenderBSP(void *node) void HWDrawInfo::RenderBSP(void *node)
{ {
InitRenderPool(); Bsp.Clock();
// Give the DrawInfo the viewpoint in fixed point because that's what the nodes are.
viewx = FLOAT2FIXED(Viewpoint.Pos.X);
viewy = FLOAT2FIXED(Viewpoint.Pos.Y);
validcount++; // used for processing sidedefs only once by the renderer.
auto future = renderPool.push([&](int id) { auto future = renderPool.push([&](int id) {
WorkerThread(this); WorkerThread();
}); });
RenderBSPNode(node); RenderBSPNode(node);
// Process all the sprites on the current portal's back side which touch the portal.
if (mCurrentPortal != nullptr) mCurrentPortal->RenderAttached(this);
jobQueue.AddJob(RenderJob::TerminateJob, nullptr, nullptr); jobQueue.AddJob(RenderJob::TerminateJob, nullptr, nullptr);
Bsp.Unclock();
future.wait(); future.wait();
jobQueue.ReleaseAll(); jobQueue.ReleaseAll();
} }

View file

@ -449,22 +449,10 @@ void HWDrawInfo::CreateScene()
ProcessAll.Clock(); ProcessAll.Clock();
// clip the scene and fill the drawlists // clip the scene and fill the drawlists
Bsp.Clock();
screen->mVertexData->Map(); screen->mVertexData->Map();
screen->mLights->Map(); screen->mLights->Map();
// Give the DrawInfo the viewpoint in fixed point because that's what the nodes are.
viewx = FLOAT2FIXED(vp.Pos.X);
viewy = FLOAT2FIXED(vp.Pos.Y);
validcount++; // used for processing sidedefs only once by the renderer.
RenderBSP(level.HeadNode()); RenderBSP(level.HeadNode());
PreparePlayerSprites(vp.sector, in_area);
// Process all the sprites on the current portal's back side which touch the portal.
if (mCurrentPortal != nullptr) mCurrentPortal->RenderAttached(this);
Bsp.Unclock();
// And now the crappy hacks that have to be done to avoid rendering anomalies. // And now the crappy hacks that have to be done to avoid rendering anomalies.
// These cannot be multithreaded when the time comes because all these depend // These cannot be multithreaded when the time comes because all these depend

View file

@ -192,6 +192,8 @@ private:
sector_t fakesec; // this is a struct member because it gets used in recursively called functions so it cannot be put on the stack. sector_t fakesec; // this is a struct member because it gets used in recursively called functions so it cannot be put on the stack.
void WorkerThread();
void UnclipSubsector(subsector_t *sub); void UnclipSubsector(subsector_t *sub);
void AddLine(seg_t *seg, bool portalclip); void AddLine(seg_t *seg, bool portalclip);
void PolySubsector(subsector_t * sub); void PolySubsector(subsector_t * sub);