Further improve early stencil rejection test by restoring stencil blocks to a single value if possible

This commit is contained in:
Magnus Norddahl 2016-12-01 13:31:42 +01:00
parent 06362385d6
commit fc16f6bbbc
4 changed files with 94 additions and 45 deletions

View file

@ -285,7 +285,31 @@ void DrawTriangleCodegen::LoopBlockX()
} }
branch_covered.else_block(); branch_covered.else_block();
{ {
LoopPartialBlock(); SSAIfBlock branch_covered_stencil;
branch_covered_stencil.if_block(StencilIsSingleValue());
{
SSABool stenciltestpass;
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose)
{
stenciltestpass = SSABool::compare_uge(StencilGetSingle(), stencilTestValue);
}
else
{
stenciltestpass = StencilGetSingle() == stencilTestValue;
}
SSAIfBlock branch_stenciltestpass;
branch_stenciltestpass.if_block(stenciltestpass);
{
LoopPartialBlock(true);
}
branch_stenciltestpass.end_block();
}
branch_covered_stencil.else_block();
{
LoopPartialBlock(false);
}
branch_covered_stencil.end_block();
} }
branch_covered.end_block(); branch_covered.end_block();
@ -422,10 +446,26 @@ void DrawTriangleCodegen::LoopFullBlock()
} }
} }
void DrawTriangleCodegen::LoopPartialBlock() void DrawTriangleCodegen::LoopPartialBlock(bool isSingleStencilValue)
{ {
int pixelsize = truecolor ? 4 : 1; int pixelsize = truecolor ? 4 : 1;
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
{
if (isSingleStencilValue)
{
SSAInt stencilMask = StencilBlockMask.load(false);
SSAUByte val0 = stencilMask.trunc_ubyte();
for (int i = 0; i < 8 * 8; i++)
StencilBlock[i].store(val0);
StencilBlockMask.store(SSAInt(0));
}
SSAUByte lastStencilValue = StencilBlock[0].load(false);
stack_stencilblock_restored.store(SSABool(true));
stack_stencilblock_lastval.store(lastStencilValue);
}
stack_CY1.store(C1 + DX12 * y0 - DY12 * x0); stack_CY1.store(C1 + DX12 * y0 - DY12 * x0);
stack_CY2.store(C2 + DX23 * y0 - DY23 * x0); stack_CY2.store(C2 + DX23 * y0 - DY23 * x0);
stack_CY3.store(C3 + DX31 * y0 - DY31 * x0); stack_CY3.store(C3 + DX31 * y0 - DY31 * x0);
@ -461,6 +501,13 @@ void DrawTriangleCodegen::LoopPartialBlock()
stack_ix.store(SSAInt(0)); stack_ix.store(SSAInt(0));
SSAForBlock loopx; SSAForBlock loopx;
SSABool stencilblock_restored;
SSAUByte lastStencilValue;
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
{
stencilblock_restored = stack_stencilblock_restored.load();
lastStencilValue = stack_stencilblock_lastval.load();
}
SSAInt ix = stack_ix.load(); SSAInt ix = stack_ix.load();
SSAInt CX1 = stack_CX1.load(); SSAInt CX1 = stack_CX1.load();
SSAInt CX2 = stack_CX2.load(); SSAInt CX2 = stack_CX2.load();
@ -472,17 +519,26 @@ void DrawTriangleCodegen::LoopPartialBlock()
SSABool visible = (ix + x < clipright) && (iy + y < clipbottom); SSABool visible = (ix + x < clipright) && (iy + y < clipbottom);
SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible; SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible;
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) if (!isSingleStencilValue)
{ {
covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth; SSAUByte stencilValue = StencilBlock[ix + iy * 8].load(false);
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
{
covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth;
}
else if (variant == TriDrawVariant::StencilClose)
{
covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue);
}
else
{
covered = covered && stencilValue == stencilTestValue;
}
} }
else if (variant == TriDrawVariant::StencilClose) else if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
{ {
covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue); covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth;
}
else
{
covered = covered && StencilGet(ix, iy) == stencilTestValue;
} }
SSAIfBlock branch; SSAIfBlock branch;
@ -490,11 +546,11 @@ void DrawTriangleCodegen::LoopPartialBlock()
{ {
if (variant == TriDrawVariant::Stencil) if (variant == TriDrawVariant::Stencil)
{ {
StencilSet(ix, iy, stencilWriteValue); StencilBlock[ix + iy * 8].store(stencilWriteValue);
} }
else if (variant == TriDrawVariant::StencilClose) else if (variant == TriDrawVariant::StencilClose)
{ {
StencilSet(ix, iy, stencilWriteValue); StencilBlock[ix + iy * 8].store(stencilWriteValue);
subsectorbuffer[ix].store(subsectorDepth); subsectorbuffer[ix].store(subsectorDepth);
} }
else else
@ -518,6 +574,13 @@ void DrawTriangleCodegen::LoopPartialBlock()
} }
branch.end_block(); branch.end_block();
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
{
SSAUByte newStencilValue = StencilBlock[ix + iy * 8].load(false);
stack_stencilblock_restored.store(stencilblock_restored && newStencilValue == lastStencilValue);
stack_stencilblock_lastval.store(newStencilValue);
}
for (int i = 0; i < TriVertex::NumVarying; i++) for (int i = 0; i < TriVertex::NumVarying; i++)
stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]); stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]);
@ -539,6 +602,18 @@ void DrawTriangleCodegen::LoopPartialBlock()
stack_iy.store(iy + 1); stack_iy.store(iy + 1);
} }
loopy.end_block(); loopy.end_block();
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
{
SSAIfBlock branch;
SSABool restored = stack_stencilblock_restored.load();
branch.if_block(restored);
{
SSAUByte lastStencilValue = stack_stencilblock_lastval.load();
StencilClear(lastStencilValue);
}
branch.end_block();
}
} }
#if 0 #if 0
@ -891,33 +966,6 @@ void DrawTriangleCodegen::SetStencilBlock(SSAInt block)
StencilBlockMask = stencilMasks[block]; StencilBlockMask = stencilMasks[block];
} }
void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value)
{
SSAInt mask = StencilBlockMask.load(false);
SSAIfBlock branchNeedsUpdate;
branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int())));
SSAIfBlock branchFirstSet;
branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00));
{
SSAUByte val0 = mask.trunc_ubyte();
for (int i = 0; i < 8 * 8; i++)
StencilBlock[i].store(val0);
StencilBlockMask.store(SSAInt(0));
}
branchFirstSet.end_block();
StencilBlock[x + y * 8].store(value);
branchNeedsUpdate.end_block();
}
SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y)
{
return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false));
}
SSAUByte DrawTriangleCodegen::StencilGetSingle() SSAUByte DrawTriangleCodegen::StencilGetSingle()
{ {
return StencilBlockMask.load(false).trunc_ubyte(); return StencilBlockMask.load(false).trunc_ubyte();

View file

@ -44,7 +44,7 @@ private:
void LoopBlockY(); void LoopBlockY();
void LoopBlockX(); void LoopBlockX();
void LoopFullBlock(); void LoopFullBlock();
void LoopPartialBlock(); void LoopPartialBlock(bool isSingleStencilValue);
void SetupAffineBlock(); void SetupAffineBlock();
SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying);
@ -59,9 +59,7 @@ private:
SSAInt ToPal8(SSAVec4i c); SSAInt ToPal8(SSAVec4i c);
void SetStencilBlock(SSAInt block); void SetStencilBlock(SSAInt block);
void StencilSet(SSAInt x, SSAInt y, SSAUByte value);
void StencilClear(SSAUByte value); void StencilClear(SSAUByte value);
SSAUByte StencilGet(SSAInt x, SSAInt y);
SSAUByte StencilGetSingle(); SSAUByte StencilGetSingle();
SSABool StencilIsSingleValue(); SSABool StencilIsSingleValue();
@ -87,6 +85,8 @@ private:
SSAStack<SSAFloat> stack_AffineW; SSAStack<SSAFloat> stack_AffineW;
SSAStack<SSAFloat> stack_AffineVaryingPosY[TriVertex::NumVarying]; SSAStack<SSAFloat> stack_AffineVaryingPosY[TriVertex::NumVarying];
SSAStack<SSAInt> stack_AffineVaryingPosX[TriVertex::NumVarying]; SSAStack<SSAInt> stack_AffineVaryingPosX[TriVertex::NumVarying];
SSAStack<SSABool> stack_stencilblock_restored;
SSAStack<SSAUByte> stack_stencilblock_lastval;
SSAUBytePtr dest; SSAUBytePtr dest;
SSAInt pitch; SSAInt pitch;

View file

@ -31,12 +31,13 @@ SSABool::SSABool()
: v(0) : v(0)
{ {
} }
/*
SSABool::SSABool(bool constant) SSABool::SSABool(bool constant)
: v(0) : v(0)
{ {
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, constant, false));
} }
*/
SSABool::SSABool(llvm::Value *v) SSABool::SSABool(llvm::Value *v)
: v(v) : v(v)
{ {

View file

@ -35,7 +35,7 @@ class SSABool
{ {
public: public:
SSABool(); SSABool();
//SSABool(bool constant); explicit SSABool(bool constant);
explicit SSABool(llvm::Value *v); explicit SSABool(llvm::Value *v);
static SSABool from_llvm(llvm::Value *v) { return SSABool(v); } static SSABool from_llvm(llvm::Value *v) { return SSABool(v); }
static llvm::Type *llvm_type(); static llvm::Type *llvm_type();