mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-27 22:33:17 +00:00
Further improve early stencil rejection test by restoring stencil blocks to a single value if possible
This commit is contained in:
parent
06362385d6
commit
fc16f6bbbc
4 changed files with 94 additions and 45 deletions
|
@ -285,7 +285,31 @@ void DrawTriangleCodegen::LoopBlockX()
|
|||
}
|
||||
branch_covered.else_block();
|
||||
{
|
||||
LoopPartialBlock();
|
||||
SSAIfBlock branch_covered_stencil;
|
||||
branch_covered_stencil.if_block(StencilIsSingleValue());
|
||||
{
|
||||
SSABool stenciltestpass;
|
||||
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
stenciltestpass = SSABool::compare_uge(StencilGetSingle(), stencilTestValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
stenciltestpass = StencilGetSingle() == stencilTestValue;
|
||||
}
|
||||
|
||||
SSAIfBlock branch_stenciltestpass;
|
||||
branch_stenciltestpass.if_block(stenciltestpass);
|
||||
{
|
||||
LoopPartialBlock(true);
|
||||
}
|
||||
branch_stenciltestpass.end_block();
|
||||
}
|
||||
branch_covered_stencil.else_block();
|
||||
{
|
||||
LoopPartialBlock(false);
|
||||
}
|
||||
branch_covered_stencil.end_block();
|
||||
}
|
||||
branch_covered.end_block();
|
||||
|
||||
|
@ -422,10 +446,26 @@ void DrawTriangleCodegen::LoopFullBlock()
|
|||
}
|
||||
}
|
||||
|
||||
void DrawTriangleCodegen::LoopPartialBlock()
|
||||
void DrawTriangleCodegen::LoopPartialBlock(bool isSingleStencilValue)
|
||||
{
|
||||
int pixelsize = truecolor ? 4 : 1;
|
||||
|
||||
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
if (isSingleStencilValue)
|
||||
{
|
||||
SSAInt stencilMask = StencilBlockMask.load(false);
|
||||
SSAUByte val0 = stencilMask.trunc_ubyte();
|
||||
for (int i = 0; i < 8 * 8; i++)
|
||||
StencilBlock[i].store(val0);
|
||||
StencilBlockMask.store(SSAInt(0));
|
||||
}
|
||||
|
||||
SSAUByte lastStencilValue = StencilBlock[0].load(false);
|
||||
stack_stencilblock_restored.store(SSABool(true));
|
||||
stack_stencilblock_lastval.store(lastStencilValue);
|
||||
}
|
||||
|
||||
stack_CY1.store(C1 + DX12 * y0 - DY12 * x0);
|
||||
stack_CY2.store(C2 + DX23 * y0 - DY23 * x0);
|
||||
stack_CY3.store(C3 + DX31 * y0 - DY31 * x0);
|
||||
|
@ -461,6 +501,13 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
|||
stack_ix.store(SSAInt(0));
|
||||
|
||||
SSAForBlock loopx;
|
||||
SSABool stencilblock_restored;
|
||||
SSAUByte lastStencilValue;
|
||||
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
stencilblock_restored = stack_stencilblock_restored.load();
|
||||
lastStencilValue = stack_stencilblock_lastval.load();
|
||||
}
|
||||
SSAInt ix = stack_ix.load();
|
||||
SSAInt CX1 = stack_CX1.load();
|
||||
SSAInt CX2 = stack_CX2.load();
|
||||
|
@ -472,17 +519,26 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
|||
SSABool visible = (ix + x < clipright) && (iy + y < clipbottom);
|
||||
SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible;
|
||||
|
||||
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||
if (!isSingleStencilValue)
|
||||
{
|
||||
covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth;
|
||||
SSAUByte stencilValue = StencilBlock[ix + iy * 8].load(false);
|
||||
|
||||
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||
{
|
||||
covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth;
|
||||
}
|
||||
else if (variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
covered = covered && stencilValue == stencilTestValue;
|
||||
}
|
||||
}
|
||||
else if (variant == TriDrawVariant::StencilClose)
|
||||
else if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||
{
|
||||
covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
covered = covered && StencilGet(ix, iy) == stencilTestValue;
|
||||
covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth;
|
||||
}
|
||||
|
||||
SSAIfBlock branch;
|
||||
|
@ -490,11 +546,11 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
|||
{
|
||||
if (variant == TriDrawVariant::Stencil)
|
||||
{
|
||||
StencilSet(ix, iy, stencilWriteValue);
|
||||
StencilBlock[ix + iy * 8].store(stencilWriteValue);
|
||||
}
|
||||
else if (variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
StencilSet(ix, iy, stencilWriteValue);
|
||||
StencilBlock[ix + iy * 8].store(stencilWriteValue);
|
||||
subsectorbuffer[ix].store(subsectorDepth);
|
||||
}
|
||||
else
|
||||
|
@ -518,6 +574,13 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
|||
}
|
||||
branch.end_block();
|
||||
|
||||
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
SSAUByte newStencilValue = StencilBlock[ix + iy * 8].load(false);
|
||||
stack_stencilblock_restored.store(stencilblock_restored && newStencilValue == lastStencilValue);
|
||||
stack_stencilblock_lastval.store(newStencilValue);
|
||||
}
|
||||
|
||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||
stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]);
|
||||
|
||||
|
@ -539,6 +602,18 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
|||
stack_iy.store(iy + 1);
|
||||
}
|
||||
loopy.end_block();
|
||||
|
||||
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
SSAIfBlock branch;
|
||||
SSABool restored = stack_stencilblock_restored.load();
|
||||
branch.if_block(restored);
|
||||
{
|
||||
SSAUByte lastStencilValue = stack_stencilblock_lastval.load();
|
||||
StencilClear(lastStencilValue);
|
||||
}
|
||||
branch.end_block();
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
@ -891,33 +966,6 @@ void DrawTriangleCodegen::SetStencilBlock(SSAInt block)
|
|||
StencilBlockMask = stencilMasks[block];
|
||||
}
|
||||
|
||||
void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value)
|
||||
{
|
||||
SSAInt mask = StencilBlockMask.load(false);
|
||||
|
||||
SSAIfBlock branchNeedsUpdate;
|
||||
branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int())));
|
||||
|
||||
SSAIfBlock branchFirstSet;
|
||||
branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00));
|
||||
{
|
||||
SSAUByte val0 = mask.trunc_ubyte();
|
||||
for (int i = 0; i < 8 * 8; i++)
|
||||
StencilBlock[i].store(val0);
|
||||
StencilBlockMask.store(SSAInt(0));
|
||||
}
|
||||
branchFirstSet.end_block();
|
||||
|
||||
StencilBlock[x + y * 8].store(value);
|
||||
|
||||
branchNeedsUpdate.end_block();
|
||||
}
|
||||
|
||||
SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y)
|
||||
{
|
||||
return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false));
|
||||
}
|
||||
|
||||
SSAUByte DrawTriangleCodegen::StencilGetSingle()
|
||||
{
|
||||
return StencilBlockMask.load(false).trunc_ubyte();
|
||||
|
|
|
@ -44,7 +44,7 @@ private:
|
|||
void LoopBlockY();
|
||||
void LoopBlockX();
|
||||
void LoopFullBlock();
|
||||
void LoopPartialBlock();
|
||||
void LoopPartialBlock(bool isSingleStencilValue);
|
||||
void SetupAffineBlock();
|
||||
|
||||
SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying);
|
||||
|
@ -59,9 +59,7 @@ private:
|
|||
SSAInt ToPal8(SSAVec4i c);
|
||||
|
||||
void SetStencilBlock(SSAInt block);
|
||||
void StencilSet(SSAInt x, SSAInt y, SSAUByte value);
|
||||
void StencilClear(SSAUByte value);
|
||||
SSAUByte StencilGet(SSAInt x, SSAInt y);
|
||||
SSAUByte StencilGetSingle();
|
||||
SSABool StencilIsSingleValue();
|
||||
|
||||
|
@ -87,6 +85,8 @@ private:
|
|||
SSAStack<SSAFloat> stack_AffineW;
|
||||
SSAStack<SSAFloat> stack_AffineVaryingPosY[TriVertex::NumVarying];
|
||||
SSAStack<SSAInt> stack_AffineVaryingPosX[TriVertex::NumVarying];
|
||||
SSAStack<SSABool> stack_stencilblock_restored;
|
||||
SSAStack<SSAUByte> stack_stencilblock_lastval;
|
||||
|
||||
SSAUBytePtr dest;
|
||||
SSAInt pitch;
|
||||
|
|
|
@ -31,12 +31,13 @@ SSABool::SSABool()
|
|||
: v(0)
|
||||
{
|
||||
}
|
||||
/*
|
||||
|
||||
SSABool::SSABool(bool constant)
|
||||
: v(0)
|
||||
{
|
||||
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, constant, false));
|
||||
}
|
||||
*/
|
||||
|
||||
SSABool::SSABool(llvm::Value *v)
|
||||
: v(v)
|
||||
{
|
||||
|
|
|
@ -35,7 +35,7 @@ class SSABool
|
|||
{
|
||||
public:
|
||||
SSABool();
|
||||
//SSABool(bool constant);
|
||||
explicit SSABool(bool constant);
|
||||
explicit SSABool(llvm::Value *v);
|
||||
static SSABool from_llvm(llvm::Value *v) { return SSABool(v); }
|
||||
static llvm::Type *llvm_type();
|
||||
|
|
Loading…
Reference in a new issue