Use a simpler algorithm for the stencil buffer as the old one was too slow

This commit is contained in:
Magnus Norddahl 2016-11-12 13:50:28 +01:00
parent eb4021b997
commit e3dc9c93b9
6 changed files with 44 additions and 85 deletions

View File

@ -520,75 +520,41 @@ void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value)
SSAInt mask = StencilBlockMask.load(false); SSAInt mask = StencilBlockMask.load(false);
SSAIfBlock branchNeedsUpdate; SSAIfBlock branchNeedsUpdate;
branchNeedsUpdate.if_block(!(mask == SSAInt(0xffffffff) && StencilBlock[0].load(false) == value)); branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int())));
SSAIfBlock branchFirstSet; SSAIfBlock branchFirstSet;
branchFirstSet.if_block(mask == SSAInt(0xffffffff)); branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00));
{ {
SSAUByte val0 = StencilBlock[0].load(false); SSAUByte val0 = mask.trunc_ubyte();
for (int i = 1; i < 8 * 8; i++) for (int i = 0; i < 8 * 8; i++)
StencilBlock[i].store(val0); StencilBlock[i].store(val0);
StencilBlockMask.store(SSAInt(0));
} }
branchFirstSet.end_block(); branchFirstSet.end_block();
SSAIfBlock branchNeedsUpdate2;
branchNeedsUpdate2.if_block(!(StencilBlock[x + y * 8].load(false) == value));
StencilBlock[x + y * 8].store(value); StencilBlock[x + y * 8].store(value);
SSAInt leveloffset = SSAInt(0);
for (int i = 1; i < 4; i++)
{
x = x >> 1;
y = y >> 1;
SSABool differs =
!(StencilBlock[(x << i) + (y << i) * 8].load(false) == value &&
StencilBlock[((x + 1) << i) + (y << i) * 8].load(false) == value &&
StencilBlock[(x << i) + ((y + 1) << i) * 8].load(false) == value &&
StencilBlock[((x + 1) << i) + ((y + 1) << i) * 8].load(false) == value);
SSAInt levelbit = SSAInt(1) << (leveloffset + x + y * (8 >> i));
mask = differs.select(mask & ~levelbit, mask | levelbit);
leveloffset = leveloffset + (SSAInt(8) >> leveloffset) * (SSAInt(8) >> leveloffset);
}
SSABool differs =
!(StencilBlock[0].load(false) == value &&
StencilBlock[4].load(false) == value &&
StencilBlock[4 * 8].load(false) == value &&
StencilBlock[4 * 8 + 4].load(false) == value);
mask = differs.select(mask & ~(1 << 22), mask | (1 << 22));
StencilBlockMask.store(mask);
branchNeedsUpdate2.end_block();
branchNeedsUpdate.end_block(); branchNeedsUpdate.end_block();
} }
SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y) SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y)
{ {
SSABool oneValueBlock = StencilBlockMask.load(false) == SSAInt(0xffffffff); return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false));
return oneValueBlock.select(StencilBlock[0].load(false), StencilBlock[x + y * 8].load(false));
} }
SSAUByte DrawTriangleCodegen::StencilGetSingle() SSAUByte DrawTriangleCodegen::StencilGetSingle()
{ {
return StencilBlock[0].load(false); return StencilBlockMask.load(false).trunc_ubyte();
} }
void DrawTriangleCodegen::StencilClear(SSAUByte value) void DrawTriangleCodegen::StencilClear(SSAUByte value)
{ {
StencilBlock[0].store(value); StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int());
StencilBlockMask.store(SSAInt(0xffffffff));
} }
SSABool DrawTriangleCodegen::StencilIsSingleValue() SSABool DrawTriangleCodegen::StencilIsSingleValue()
{ {
return StencilBlockMask.load(false) == SSAInt(0xffffffff); return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00);
} }
void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data) void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data)

View File

@ -23,6 +23,7 @@
#include "r_compiler/llvm_include.h" #include "r_compiler/llvm_include.h"
#include "ssa_int.h" #include "ssa_int.h"
#include "ssa_float.h" #include "ssa_float.h"
#include "ssa_ubyte.h"
#include "ssa_bool.h" #include "ssa_bool.h"
#include "ssa_scope.h" #include "ssa_scope.h"
@ -81,6 +82,11 @@ SSAInt SSAInt::ashr(int bits)
return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint())); return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint()));
} }
SSAUByte SSAInt::trunc_ubyte()
{
return SSAUByte::from_llvm(SSAScope::builder().CreateTrunc(v, SSAUByte::llvm_type(), SSAScope::hint()));
}
SSAInt operator+(const SSAInt &a, const SSAInt &b) SSAInt operator+(const SSAInt &a, const SSAInt &b)
{ {
return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));

View File

@ -26,6 +26,7 @@ namespace llvm { class Value; }
namespace llvm { class Type; } namespace llvm { class Type; }
class SSAFloat; class SSAFloat;
class SSAUByte;
class SSAInt class SSAInt
{ {
@ -44,6 +45,8 @@ public:
SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap); SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap);
SSAInt ashr(int bits); SSAInt ashr(int bits);
SSAUByte trunc_ubyte();
llvm::Value *v; llvm::Value *v;
}; };

View File

@ -41,6 +41,9 @@ CVAR(Bool, r_debug_cull, 0, 0)
void RenderPolyBsp::Render() void RenderPolyBsp::Render()
{ {
if (!r_swtruecolor) // Disable pal rendering for now
return;
// Setup working buffers // Setup working buffers
PolyVertexBuffer::Clear(); PolyVertexBuffer::Clear();
SolidSegments.clear(); SolidSegments.clear();

View File

@ -55,14 +55,25 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian
auto llvm = LLVMDrawers::Instance(); auto llvm = LLVMDrawers::Instance();
void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *);
#if 1
switch (variant) switch (variant)
{ {
default: default:
case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break; case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break;
case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break; case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break;
case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break;
case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil/*llvm->TriStencil*/; break; case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break;
} }
#else
switch (variant)
{
default:
case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break;
case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break;
case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break;
case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break;
}
#endif
TriDrawTriangleArgs args; TriDrawTriangleArgs args;
args.dest = dc_destorg; args.dest = dc_destorg;

View File

@ -85,70 +85,40 @@ public:
void Set(int x, int y, uint8_t value) void Set(int x, int y, uint8_t value)
{ {
if (ValueMask == 0xffffffff) if ((ValueMask & 0xffffff00) == 0xffffff00)
{ {
if (Values[0] == value) if ((ValueMask & 0xff) == value)
return; return;
for (int i = 1; i < 8 * 8; i++) for (int i = 0; i < 8 * 8; i++)
Values[i] = Values[0]; Values[i] = (ValueMask & 0xff);
ValueMask = 0;
} }
if (Values[x + y * 8] == value)
return;
Values[x + y * 8] = value; Values[x + y * 8] = value;
int leveloffset = 0;
for (int i = 1; i < 4; i++)
{
x >>= 1;
y >>= 1;
bool differs =
Values[(x << i) + (y << i) * 8] != value ||
Values[((x + 1) << i) + (y << i) * 8] != value ||
Values[(x << i) + ((y + 1) << i) * 8] != value ||
Values[((x + 1) << i) + ((y + 1) << i) * 8] != value;
int levelbit = 1 << (leveloffset + x + y * (8 >> i));
if (differs)
ValueMask = ValueMask & ~levelbit;
else
ValueMask = ValueMask | levelbit;
leveloffset += (8 >> leveloffset) * (8 >> leveloffset);
}
if (Values[0] != value || Values[4] != value || Values[4 * 8] != value || Values[4 * 8 + 4] != value)
ValueMask = ValueMask & ~(1 << 22);
else
ValueMask = ValueMask | (1 << 22);
} }
uint8_t Get(int x, int y) const uint8_t Get(int x, int y) const
{ {
if (ValueMask == 0xffffffff) if (IsSingleValue())
return Values[0]; return ValueMask & 0xff;
else else
return Values[x + y * 8]; return Values[x + y * 8];
} }
void Clear(uint8_t value) void Clear(uint8_t value)
{ {
Values[0] = value; ValueMask = 0xffffff00 | (uint32_t)value;
ValueMask = 0xffffffff;
} }
bool IsSingleValue() const bool IsSingleValue() const
{ {
return ValueMask == 0xffffffff; return (ValueMask & 0xffffff00) == 0xffffff00;
} }
private: private:
uint8_t *Values; // [8 * 8]; uint8_t *Values;
uint32_t &ValueMask; // 4 * 4 + 2 * 2 + 1 bits indicating is Values are the same uint32_t &ValueMask;
}; };
class PolySubsectorGBuffer class PolySubsectorGBuffer