mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-24 04:51:41 +00:00
Use a simpler algorithm for the stencil buffer as the old one was too slow
This commit is contained in:
parent
eb4021b997
commit
e3dc9c93b9
6 changed files with 44 additions and 85 deletions
|
@ -520,75 +520,41 @@ void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value)
|
||||||
SSAInt mask = StencilBlockMask.load(false);
|
SSAInt mask = StencilBlockMask.load(false);
|
||||||
|
|
||||||
SSAIfBlock branchNeedsUpdate;
|
SSAIfBlock branchNeedsUpdate;
|
||||||
branchNeedsUpdate.if_block(!(mask == SSAInt(0xffffffff) && StencilBlock[0].load(false) == value));
|
branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int())));
|
||||||
|
|
||||||
SSAIfBlock branchFirstSet;
|
SSAIfBlock branchFirstSet;
|
||||||
branchFirstSet.if_block(mask == SSAInt(0xffffffff));
|
branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00));
|
||||||
{
|
{
|
||||||
SSAUByte val0 = StencilBlock[0].load(false);
|
SSAUByte val0 = mask.trunc_ubyte();
|
||||||
for (int i = 1; i < 8 * 8; i++)
|
for (int i = 0; i < 8 * 8; i++)
|
||||||
StencilBlock[i].store(val0);
|
StencilBlock[i].store(val0);
|
||||||
|
StencilBlockMask.store(SSAInt(0));
|
||||||
}
|
}
|
||||||
branchFirstSet.end_block();
|
branchFirstSet.end_block();
|
||||||
|
|
||||||
SSAIfBlock branchNeedsUpdate2;
|
|
||||||
branchNeedsUpdate2.if_block(!(StencilBlock[x + y * 8].load(false) == value));
|
|
||||||
|
|
||||||
StencilBlock[x + y * 8].store(value);
|
StencilBlock[x + y * 8].store(value);
|
||||||
|
|
||||||
SSAInt leveloffset = SSAInt(0);
|
|
||||||
for (int i = 1; i < 4; i++)
|
|
||||||
{
|
|
||||||
x = x >> 1;
|
|
||||||
y = y >> 1;
|
|
||||||
|
|
||||||
SSABool differs =
|
|
||||||
!(StencilBlock[(x << i) + (y << i) * 8].load(false) == value &&
|
|
||||||
StencilBlock[((x + 1) << i) + (y << i) * 8].load(false) == value &&
|
|
||||||
StencilBlock[(x << i) + ((y + 1) << i) * 8].load(false) == value &&
|
|
||||||
StencilBlock[((x + 1) << i) + ((y + 1) << i) * 8].load(false) == value);
|
|
||||||
|
|
||||||
SSAInt levelbit = SSAInt(1) << (leveloffset + x + y * (8 >> i));
|
|
||||||
|
|
||||||
mask = differs.select(mask & ~levelbit, mask | levelbit);
|
|
||||||
|
|
||||||
leveloffset = leveloffset + (SSAInt(8) >> leveloffset) * (SSAInt(8) >> leveloffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
SSABool differs =
|
|
||||||
!(StencilBlock[0].load(false) == value &&
|
|
||||||
StencilBlock[4].load(false) == value &&
|
|
||||||
StencilBlock[4 * 8].load(false) == value &&
|
|
||||||
StencilBlock[4 * 8 + 4].load(false) == value);
|
|
||||||
|
|
||||||
mask = differs.select(mask & ~(1 << 22), mask | (1 << 22));
|
|
||||||
|
|
||||||
StencilBlockMask.store(mask);
|
|
||||||
|
|
||||||
branchNeedsUpdate2.end_block();
|
|
||||||
branchNeedsUpdate.end_block();
|
branchNeedsUpdate.end_block();
|
||||||
}
|
}
|
||||||
|
|
||||||
SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y)
|
SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y)
|
||||||
{
|
{
|
||||||
SSABool oneValueBlock = StencilBlockMask.load(false) == SSAInt(0xffffffff);
|
return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false));
|
||||||
return oneValueBlock.select(StencilBlock[0].load(false), StencilBlock[x + y * 8].load(false));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SSAUByte DrawTriangleCodegen::StencilGetSingle()
|
SSAUByte DrawTriangleCodegen::StencilGetSingle()
|
||||||
{
|
{
|
||||||
return StencilBlock[0].load(false);
|
return StencilBlockMask.load(false).trunc_ubyte();
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawTriangleCodegen::StencilClear(SSAUByte value)
|
void DrawTriangleCodegen::StencilClear(SSAUByte value)
|
||||||
{
|
{
|
||||||
StencilBlock[0].store(value);
|
StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int());
|
||||||
StencilBlockMask.store(SSAInt(0xffffffff));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
SSABool DrawTriangleCodegen::StencilIsSingleValue()
|
SSABool DrawTriangleCodegen::StencilIsSingleValue()
|
||||||
{
|
{
|
||||||
return StencilBlockMask.load(false) == SSAInt(0xffffffff);
|
return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data)
|
void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data)
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "r_compiler/llvm_include.h"
|
#include "r_compiler/llvm_include.h"
|
||||||
#include "ssa_int.h"
|
#include "ssa_int.h"
|
||||||
#include "ssa_float.h"
|
#include "ssa_float.h"
|
||||||
|
#include "ssa_ubyte.h"
|
||||||
#include "ssa_bool.h"
|
#include "ssa_bool.h"
|
||||||
#include "ssa_scope.h"
|
#include "ssa_scope.h"
|
||||||
|
|
||||||
|
@ -81,6 +82,11 @@ SSAInt SSAInt::ashr(int bits)
|
||||||
return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint()));
|
return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SSAUByte SSAInt::trunc_ubyte()
|
||||||
|
{
|
||||||
|
return SSAUByte::from_llvm(SSAScope::builder().CreateTrunc(v, SSAUByte::llvm_type(), SSAScope::hint()));
|
||||||
|
}
|
||||||
|
|
||||||
SSAInt operator+(const SSAInt &a, const SSAInt &b)
|
SSAInt operator+(const SSAInt &a, const SSAInt &b)
|
||||||
{
|
{
|
||||||
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
|
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
|
||||||
|
|
|
@ -26,6 +26,7 @@ namespace llvm { class Value; }
|
||||||
namespace llvm { class Type; }
|
namespace llvm { class Type; }
|
||||||
|
|
||||||
class SSAFloat;
|
class SSAFloat;
|
||||||
|
class SSAUByte;
|
||||||
|
|
||||||
class SSAInt
|
class SSAInt
|
||||||
{
|
{
|
||||||
|
@ -44,6 +45,8 @@ public:
|
||||||
SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap);
|
SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap);
|
||||||
SSAInt ashr(int bits);
|
SSAInt ashr(int bits);
|
||||||
|
|
||||||
|
SSAUByte trunc_ubyte();
|
||||||
|
|
||||||
llvm::Value *v;
|
llvm::Value *v;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -41,6 +41,9 @@ CVAR(Bool, r_debug_cull, 0, 0)
|
||||||
|
|
||||||
void RenderPolyBsp::Render()
|
void RenderPolyBsp::Render()
|
||||||
{
|
{
|
||||||
|
if (!r_swtruecolor) // Disable pal rendering for now
|
||||||
|
return;
|
||||||
|
|
||||||
// Setup working buffers
|
// Setup working buffers
|
||||||
PolyVertexBuffer::Clear();
|
PolyVertexBuffer::Clear();
|
||||||
SolidSegments.clear();
|
SolidSegments.clear();
|
||||||
|
|
|
@ -55,14 +55,25 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian
|
||||||
|
|
||||||
auto llvm = LLVMDrawers::Instance();
|
auto llvm = LLVMDrawers::Instance();
|
||||||
void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *);
|
void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *);
|
||||||
|
#if 1
|
||||||
switch (variant)
|
switch (variant)
|
||||||
{
|
{
|
||||||
default:
|
default:
|
||||||
case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break;
|
case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break;
|
||||||
case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break;
|
case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break;
|
||||||
case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break;
|
case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break;
|
||||||
case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil/*llvm->TriStencil*/; break;
|
case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
switch (variant)
|
||||||
|
{
|
||||||
|
default:
|
||||||
|
case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break;
|
||||||
|
case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break;
|
||||||
|
case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break;
|
||||||
|
case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
TriDrawTriangleArgs args;
|
TriDrawTriangleArgs args;
|
||||||
args.dest = dc_destorg;
|
args.dest = dc_destorg;
|
||||||
|
|
|
@ -85,70 +85,40 @@ public:
|
||||||
|
|
||||||
void Set(int x, int y, uint8_t value)
|
void Set(int x, int y, uint8_t value)
|
||||||
{
|
{
|
||||||
if (ValueMask == 0xffffffff)
|
if ((ValueMask & 0xffffff00) == 0xffffff00)
|
||||||
{
|
{
|
||||||
if (Values[0] == value)
|
if ((ValueMask & 0xff) == value)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
for (int i = 1; i < 8 * 8; i++)
|
for (int i = 0; i < 8 * 8; i++)
|
||||||
Values[i] = Values[0];
|
Values[i] = (ValueMask & 0xff);
|
||||||
|
ValueMask = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Values[x + y * 8] == value)
|
|
||||||
return;
|
|
||||||
|
|
||||||
Values[x + y * 8] = value;
|
Values[x + y * 8] = value;
|
||||||
|
|
||||||
int leveloffset = 0;
|
|
||||||
for (int i = 1; i < 4; i++)
|
|
||||||
{
|
|
||||||
x >>= 1;
|
|
||||||
y >>= 1;
|
|
||||||
|
|
||||||
bool differs =
|
|
||||||
Values[(x << i) + (y << i) * 8] != value ||
|
|
||||||
Values[((x + 1) << i) + (y << i) * 8] != value ||
|
|
||||||
Values[(x << i) + ((y + 1) << i) * 8] != value ||
|
|
||||||
Values[((x + 1) << i) + ((y + 1) << i) * 8] != value;
|
|
||||||
|
|
||||||
int levelbit = 1 << (leveloffset + x + y * (8 >> i));
|
|
||||||
|
|
||||||
if (differs)
|
|
||||||
ValueMask = ValueMask & ~levelbit;
|
|
||||||
else
|
|
||||||
ValueMask = ValueMask | levelbit;
|
|
||||||
|
|
||||||
leveloffset += (8 >> leveloffset) * (8 >> leveloffset);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (Values[0] != value || Values[4] != value || Values[4 * 8] != value || Values[4 * 8 + 4] != value)
|
|
||||||
ValueMask = ValueMask & ~(1 << 22);
|
|
||||||
else
|
|
||||||
ValueMask = ValueMask | (1 << 22);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint8_t Get(int x, int y) const
|
uint8_t Get(int x, int y) const
|
||||||
{
|
{
|
||||||
if (ValueMask == 0xffffffff)
|
if (IsSingleValue())
|
||||||
return Values[0];
|
return ValueMask & 0xff;
|
||||||
else
|
else
|
||||||
return Values[x + y * 8];
|
return Values[x + y * 8];
|
||||||
}
|
}
|
||||||
|
|
||||||
void Clear(uint8_t value)
|
void Clear(uint8_t value)
|
||||||
{
|
{
|
||||||
Values[0] = value;
|
ValueMask = 0xffffff00 | (uint32_t)value;
|
||||||
ValueMask = 0xffffffff;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool IsSingleValue() const
|
bool IsSingleValue() const
|
||||||
{
|
{
|
||||||
return ValueMask == 0xffffffff;
|
return (ValueMask & 0xffffff00) == 0xffffff00;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint8_t *Values; // [8 * 8];
|
uint8_t *Values;
|
||||||
uint32_t &ValueMask; // 4 * 4 + 2 * 2 + 1 bits indicating is Values are the same
|
uint32_t &ValueMask;
|
||||||
};
|
};
|
||||||
|
|
||||||
class PolySubsectorGBuffer
|
class PolySubsectorGBuffer
|
||||||
|
|
Loading…
Reference in a new issue