Use a simpler algorithm for the stencil buffer as the old one was too slow

This commit is contained in:
Magnus Norddahl 2016-11-12 13:50:28 +01:00
parent eb4021b997
commit e3dc9c93b9
6 changed files with 44 additions and 85 deletions

View file

@ -520,75 +520,41 @@ void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value)
SSAInt mask = StencilBlockMask.load(false);
SSAIfBlock branchNeedsUpdate;
branchNeedsUpdate.if_block(!(mask == SSAInt(0xffffffff) && StencilBlock[0].load(false) == value));
branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int())));
SSAIfBlock branchFirstSet;
branchFirstSet.if_block(mask == SSAInt(0xffffffff));
branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00));
{
SSAUByte val0 = StencilBlock[0].load(false);
for (int i = 1; i < 8 * 8; i++)
SSAUByte val0 = mask.trunc_ubyte();
for (int i = 0; i < 8 * 8; i++)
StencilBlock[i].store(val0);
StencilBlockMask.store(SSAInt(0));
}
branchFirstSet.end_block();
SSAIfBlock branchNeedsUpdate2;
branchNeedsUpdate2.if_block(!(StencilBlock[x + y * 8].load(false) == value));
StencilBlock[x + y * 8].store(value);
SSAInt leveloffset = SSAInt(0);
for (int i = 1; i < 4; i++)
{
x = x >> 1;
y = y >> 1;
SSABool differs =
!(StencilBlock[(x << i) + (y << i) * 8].load(false) == value &&
StencilBlock[((x + 1) << i) + (y << i) * 8].load(false) == value &&
StencilBlock[(x << i) + ((y + 1) << i) * 8].load(false) == value &&
StencilBlock[((x + 1) << i) + ((y + 1) << i) * 8].load(false) == value);
SSAInt levelbit = SSAInt(1) << (leveloffset + x + y * (8 >> i));
mask = differs.select(mask & ~levelbit, mask | levelbit);
leveloffset = leveloffset + (SSAInt(8) >> leveloffset) * (SSAInt(8) >> leveloffset);
}
SSABool differs =
!(StencilBlock[0].load(false) == value &&
StencilBlock[4].load(false) == value &&
StencilBlock[4 * 8].load(false) == value &&
StencilBlock[4 * 8 + 4].load(false) == value);
mask = differs.select(mask & ~(1 << 22), mask | (1 << 22));
StencilBlockMask.store(mask);
branchNeedsUpdate2.end_block();
branchNeedsUpdate.end_block();
}
SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y)
{
SSABool oneValueBlock = StencilBlockMask.load(false) == SSAInt(0xffffffff);
return oneValueBlock.select(StencilBlock[0].load(false), StencilBlock[x + y * 8].load(false));
return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false));
}
SSAUByte DrawTriangleCodegen::StencilGetSingle()
{
return StencilBlock[0].load(false);
return StencilBlockMask.load(false).trunc_ubyte();
}
void DrawTriangleCodegen::StencilClear(SSAUByte value)
{
StencilBlock[0].store(value);
StencilBlockMask.store(SSAInt(0xffffffff));
StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int());
}
SSABool DrawTriangleCodegen::StencilIsSingleValue()
{
return StencilBlockMask.load(false) == SSAInt(0xffffffff);
return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00);
}
void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data)

View file

@ -23,6 +23,7 @@
#include "r_compiler/llvm_include.h"
#include "ssa_int.h"
#include "ssa_float.h"
#include "ssa_ubyte.h"
#include "ssa_bool.h"
#include "ssa_scope.h"
@ -81,6 +82,11 @@ SSAInt SSAInt::ashr(int bits)
return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint()));
}
SSAUByte SSAInt::trunc_ubyte()
{
return SSAUByte::from_llvm(SSAScope::builder().CreateTrunc(v, SSAUByte::llvm_type(), SSAScope::hint()));
}
SSAInt operator+(const SSAInt &a, const SSAInt &b)
{
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));

View file

@ -26,6 +26,7 @@ namespace llvm { class Value; }
namespace llvm { class Type; }
class SSAFloat;
class SSAUByte;
class SSAInt
{
@ -44,6 +45,8 @@ public:
SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap);
SSAInt ashr(int bits);
SSAUByte trunc_ubyte();
llvm::Value *v;
};

View file

@ -41,6 +41,9 @@ CVAR(Bool, r_debug_cull, 0, 0)
void RenderPolyBsp::Render()
{
if (!r_swtruecolor) // Disable pal rendering for now
return;
// Setup working buffers
PolyVertexBuffer::Clear();
SolidSegments.clear();

View file

@ -55,14 +55,25 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian
auto llvm = LLVMDrawers::Instance();
void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *);
#if 1
switch (variant)
{
default:
case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break;
case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break;
case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break;
case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil/*llvm->TriStencil*/; break;
case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break;
}
#else
switch (variant)
{
default:
case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break;
case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break;
case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break;
case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break;
}
#endif
TriDrawTriangleArgs args;
args.dest = dc_destorg;

View file

@ -85,70 +85,40 @@ public:
void Set(int x, int y, uint8_t value)
{
if (ValueMask == 0xffffffff)
if ((ValueMask & 0xffffff00) == 0xffffff00)
{
if (Values[0] == value)
if ((ValueMask & 0xff) == value)
return;
for (int i = 1; i < 8 * 8; i++)
Values[i] = Values[0];
for (int i = 0; i < 8 * 8; i++)
Values[i] = (ValueMask & 0xff);
ValueMask = 0;
}
if (Values[x + y * 8] == value)
return;
Values[x + y * 8] = value;
int leveloffset = 0;
for (int i = 1; i < 4; i++)
{
x >>= 1;
y >>= 1;
bool differs =
Values[(x << i) + (y << i) * 8] != value ||
Values[((x + 1) << i) + (y << i) * 8] != value ||
Values[(x << i) + ((y + 1) << i) * 8] != value ||
Values[((x + 1) << i) + ((y + 1) << i) * 8] != value;
int levelbit = 1 << (leveloffset + x + y * (8 >> i));
if (differs)
ValueMask = ValueMask & ~levelbit;
else
ValueMask = ValueMask | levelbit;
leveloffset += (8 >> leveloffset) * (8 >> leveloffset);
}
if (Values[0] != value || Values[4] != value || Values[4 * 8] != value || Values[4 * 8 + 4] != value)
ValueMask = ValueMask & ~(1 << 22);
else
ValueMask = ValueMask | (1 << 22);
}
uint8_t Get(int x, int y) const
{
if (ValueMask == 0xffffffff)
return Values[0];
if (IsSingleValue())
return ValueMask & 0xff;
else
return Values[x + y * 8];
}
void Clear(uint8_t value)
{
Values[0] = value;
ValueMask = 0xffffffff;
ValueMask = 0xffffff00 | (uint32_t)value;
}
bool IsSingleValue() const
{
return ValueMask == 0xffffffff;
return (ValueMask & 0xffffff00) == 0xffffff00;
}
private:
uint8_t *Values; // [8 * 8];
uint32_t &ValueMask; // 4 * 4 + 2 * 2 + 1 bits indicating is Values are the same
uint8_t *Values;
uint32_t &ValueMask;
};
class PolySubsectorGBuffer