mirror of
https://github.com/ZDoom/qzdoom-gpl.git
synced 2024-11-29 23:23:07 +00:00
Further improve early stencil rejection test by restoring stencil blocks to a single value if possible
This commit is contained in:
parent
06362385d6
commit
fc16f6bbbc
4 changed files with 94 additions and 45 deletions
|
@ -285,7 +285,31 @@ void DrawTriangleCodegen::LoopBlockX()
|
||||||
}
|
}
|
||||||
branch_covered.else_block();
|
branch_covered.else_block();
|
||||||
{
|
{
|
||||||
LoopPartialBlock();
|
SSAIfBlock branch_covered_stencil;
|
||||||
|
branch_covered_stencil.if_block(StencilIsSingleValue());
|
||||||
|
{
|
||||||
|
SSABool stenciltestpass;
|
||||||
|
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector || variant == TriDrawVariant::StencilClose)
|
||||||
|
{
|
||||||
|
stenciltestpass = SSABool::compare_uge(StencilGetSingle(), stencilTestValue);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
stenciltestpass = StencilGetSingle() == stencilTestValue;
|
||||||
|
}
|
||||||
|
|
||||||
|
SSAIfBlock branch_stenciltestpass;
|
||||||
|
branch_stenciltestpass.if_block(stenciltestpass);
|
||||||
|
{
|
||||||
|
LoopPartialBlock(true);
|
||||||
|
}
|
||||||
|
branch_stenciltestpass.end_block();
|
||||||
|
}
|
||||||
|
branch_covered_stencil.else_block();
|
||||||
|
{
|
||||||
|
LoopPartialBlock(false);
|
||||||
|
}
|
||||||
|
branch_covered_stencil.end_block();
|
||||||
}
|
}
|
||||||
branch_covered.end_block();
|
branch_covered.end_block();
|
||||||
|
|
||||||
|
@ -422,10 +446,26 @@ void DrawTriangleCodegen::LoopFullBlock()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawTriangleCodegen::LoopPartialBlock()
|
void DrawTriangleCodegen::LoopPartialBlock(bool isSingleStencilValue)
|
||||||
{
|
{
|
||||||
int pixelsize = truecolor ? 4 : 1;
|
int pixelsize = truecolor ? 4 : 1;
|
||||||
|
|
||||||
|
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
|
||||||
|
{
|
||||||
|
if (isSingleStencilValue)
|
||||||
|
{
|
||||||
|
SSAInt stencilMask = StencilBlockMask.load(false);
|
||||||
|
SSAUByte val0 = stencilMask.trunc_ubyte();
|
||||||
|
for (int i = 0; i < 8 * 8; i++)
|
||||||
|
StencilBlock[i].store(val0);
|
||||||
|
StencilBlockMask.store(SSAInt(0));
|
||||||
|
}
|
||||||
|
|
||||||
|
SSAUByte lastStencilValue = StencilBlock[0].load(false);
|
||||||
|
stack_stencilblock_restored.store(SSABool(true));
|
||||||
|
stack_stencilblock_lastval.store(lastStencilValue);
|
||||||
|
}
|
||||||
|
|
||||||
stack_CY1.store(C1 + DX12 * y0 - DY12 * x0);
|
stack_CY1.store(C1 + DX12 * y0 - DY12 * x0);
|
||||||
stack_CY2.store(C2 + DX23 * y0 - DY23 * x0);
|
stack_CY2.store(C2 + DX23 * y0 - DY23 * x0);
|
||||||
stack_CY3.store(C3 + DX31 * y0 - DY31 * x0);
|
stack_CY3.store(C3 + DX31 * y0 - DY31 * x0);
|
||||||
|
@ -461,6 +501,13 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
||||||
stack_ix.store(SSAInt(0));
|
stack_ix.store(SSAInt(0));
|
||||||
|
|
||||||
SSAForBlock loopx;
|
SSAForBlock loopx;
|
||||||
|
SSABool stencilblock_restored;
|
||||||
|
SSAUByte lastStencilValue;
|
||||||
|
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
|
||||||
|
{
|
||||||
|
stencilblock_restored = stack_stencilblock_restored.load();
|
||||||
|
lastStencilValue = stack_stencilblock_lastval.load();
|
||||||
|
}
|
||||||
SSAInt ix = stack_ix.load();
|
SSAInt ix = stack_ix.load();
|
||||||
SSAInt CX1 = stack_CX1.load();
|
SSAInt CX1 = stack_CX1.load();
|
||||||
SSAInt CX2 = stack_CX2.load();
|
SSAInt CX2 = stack_CX2.load();
|
||||||
|
@ -472,17 +519,26 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
||||||
SSABool visible = (ix + x < clipright) && (iy + y < clipbottom);
|
SSABool visible = (ix + x < clipright) && (iy + y < clipbottom);
|
||||||
SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible;
|
SSABool covered = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0) && visible;
|
||||||
|
|
||||||
|
if (!isSingleStencilValue)
|
||||||
|
{
|
||||||
|
SSAUByte stencilValue = StencilBlock[ix + iy * 8].load(false);
|
||||||
|
|
||||||
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||||
{
|
{
|
||||||
covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth;
|
covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue) && subsectorbuffer[ix].load(true) >= subsectorDepth;
|
||||||
}
|
}
|
||||||
else if (variant == TriDrawVariant::StencilClose)
|
else if (variant == TriDrawVariant::StencilClose)
|
||||||
{
|
{
|
||||||
covered = covered && SSABool::compare_uge(StencilGet(ix, iy), stencilTestValue);
|
covered = covered && SSABool::compare_uge(stencilValue, stencilTestValue);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
covered = covered && StencilGet(ix, iy) == stencilTestValue;
|
covered = covered && stencilValue == stencilTestValue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||||
|
{
|
||||||
|
covered = covered && subsectorbuffer[ix].load(true) >= subsectorDepth;
|
||||||
}
|
}
|
||||||
|
|
||||||
SSAIfBlock branch;
|
SSAIfBlock branch;
|
||||||
|
@ -490,11 +546,11 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
||||||
{
|
{
|
||||||
if (variant == TriDrawVariant::Stencil)
|
if (variant == TriDrawVariant::Stencil)
|
||||||
{
|
{
|
||||||
StencilSet(ix, iy, stencilWriteValue);
|
StencilBlock[ix + iy * 8].store(stencilWriteValue);
|
||||||
}
|
}
|
||||||
else if (variant == TriDrawVariant::StencilClose)
|
else if (variant == TriDrawVariant::StencilClose)
|
||||||
{
|
{
|
||||||
StencilSet(ix, iy, stencilWriteValue);
|
StencilBlock[ix + iy * 8].store(stencilWriteValue);
|
||||||
subsectorbuffer[ix].store(subsectorDepth);
|
subsectorbuffer[ix].store(subsectorDepth);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
@ -518,6 +574,13 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
||||||
}
|
}
|
||||||
branch.end_block();
|
branch.end_block();
|
||||||
|
|
||||||
|
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
|
||||||
|
{
|
||||||
|
SSAUByte newStencilValue = StencilBlock[ix + iy * 8].load(false);
|
||||||
|
stack_stencilblock_restored.store(stencilblock_restored && newStencilValue == lastStencilValue);
|
||||||
|
stack_stencilblock_lastval.store(newStencilValue);
|
||||||
|
}
|
||||||
|
|
||||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]);
|
stack_AffineVaryingPosX[i].store(AffineVaryingPosX[i] + AffineVaryingStepX[i]);
|
||||||
|
|
||||||
|
@ -539,6 +602,18 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
||||||
stack_iy.store(iy + 1);
|
stack_iy.store(iy + 1);
|
||||||
}
|
}
|
||||||
loopy.end_block();
|
loopy.end_block();
|
||||||
|
|
||||||
|
if (variant == TriDrawVariant::Stencil || variant == TriDrawVariant::StencilClose)
|
||||||
|
{
|
||||||
|
SSAIfBlock branch;
|
||||||
|
SSABool restored = stack_stencilblock_restored.load();
|
||||||
|
branch.if_block(restored);
|
||||||
|
{
|
||||||
|
SSAUByte lastStencilValue = stack_stencilblock_lastval.load();
|
||||||
|
StencilClear(lastStencilValue);
|
||||||
|
}
|
||||||
|
branch.end_block();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
@ -891,33 +966,6 @@ void DrawTriangleCodegen::SetStencilBlock(SSAInt block)
|
||||||
StencilBlockMask = stencilMasks[block];
|
StencilBlockMask = stencilMasks[block];
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value)
|
|
||||||
{
|
|
||||||
SSAInt mask = StencilBlockMask.load(false);
|
|
||||||
|
|
||||||
SSAIfBlock branchNeedsUpdate;
|
|
||||||
branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int())));
|
|
||||||
|
|
||||||
SSAIfBlock branchFirstSet;
|
|
||||||
branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00));
|
|
||||||
{
|
|
||||||
SSAUByte val0 = mask.trunc_ubyte();
|
|
||||||
for (int i = 0; i < 8 * 8; i++)
|
|
||||||
StencilBlock[i].store(val0);
|
|
||||||
StencilBlockMask.store(SSAInt(0));
|
|
||||||
}
|
|
||||||
branchFirstSet.end_block();
|
|
||||||
|
|
||||||
StencilBlock[x + y * 8].store(value);
|
|
||||||
|
|
||||||
branchNeedsUpdate.end_block();
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y)
|
|
||||||
{
|
|
||||||
return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false));
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAUByte DrawTriangleCodegen::StencilGetSingle()
|
SSAUByte DrawTriangleCodegen::StencilGetSingle()
|
||||||
{
|
{
|
||||||
return StencilBlockMask.load(false).trunc_ubyte();
|
return StencilBlockMask.load(false).trunc_ubyte();
|
||||||
|
|
|
@ -44,7 +44,7 @@ private:
|
||||||
void LoopBlockY();
|
void LoopBlockY();
|
||||||
void LoopBlockX();
|
void LoopBlockX();
|
||||||
void LoopFullBlock();
|
void LoopFullBlock();
|
||||||
void LoopPartialBlock();
|
void LoopPartialBlock(bool isSingleStencilValue);
|
||||||
void SetupAffineBlock();
|
void SetupAffineBlock();
|
||||||
|
|
||||||
SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying);
|
SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying);
|
||||||
|
@ -59,9 +59,7 @@ private:
|
||||||
SSAInt ToPal8(SSAVec4i c);
|
SSAInt ToPal8(SSAVec4i c);
|
||||||
|
|
||||||
void SetStencilBlock(SSAInt block);
|
void SetStencilBlock(SSAInt block);
|
||||||
void StencilSet(SSAInt x, SSAInt y, SSAUByte value);
|
|
||||||
void StencilClear(SSAUByte value);
|
void StencilClear(SSAUByte value);
|
||||||
SSAUByte StencilGet(SSAInt x, SSAInt y);
|
|
||||||
SSAUByte StencilGetSingle();
|
SSAUByte StencilGetSingle();
|
||||||
SSABool StencilIsSingleValue();
|
SSABool StencilIsSingleValue();
|
||||||
|
|
||||||
|
@ -87,6 +85,8 @@ private:
|
||||||
SSAStack<SSAFloat> stack_AffineW;
|
SSAStack<SSAFloat> stack_AffineW;
|
||||||
SSAStack<SSAFloat> stack_AffineVaryingPosY[TriVertex::NumVarying];
|
SSAStack<SSAFloat> stack_AffineVaryingPosY[TriVertex::NumVarying];
|
||||||
SSAStack<SSAInt> stack_AffineVaryingPosX[TriVertex::NumVarying];
|
SSAStack<SSAInt> stack_AffineVaryingPosX[TriVertex::NumVarying];
|
||||||
|
SSAStack<SSABool> stack_stencilblock_restored;
|
||||||
|
SSAStack<SSAUByte> stack_stencilblock_lastval;
|
||||||
|
|
||||||
SSAUBytePtr dest;
|
SSAUBytePtr dest;
|
||||||
SSAInt pitch;
|
SSAInt pitch;
|
||||||
|
|
|
@ -31,12 +31,13 @@ SSABool::SSABool()
|
||||||
: v(0)
|
: v(0)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
SSABool::SSABool(bool constant)
|
SSABool::SSABool(bool constant)
|
||||||
: v(0)
|
: v(0)
|
||||||
{
|
{
|
||||||
|
v = llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, constant, false));
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
SSABool::SSABool(llvm::Value *v)
|
SSABool::SSABool(llvm::Value *v)
|
||||||
: v(v)
|
: v(v)
|
||||||
{
|
{
|
||||||
|
|
|
@ -35,7 +35,7 @@ class SSABool
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
SSABool();
|
SSABool();
|
||||||
//SSABool(bool constant);
|
explicit SSABool(bool constant);
|
||||||
explicit SSABool(llvm::Value *v);
|
explicit SSABool(llvm::Value *v);
|
||||||
static SSABool from_llvm(llvm::Value *v) { return SSABool(v); }
|
static SSABool from_llvm(llvm::Value *v) { return SSABool(v); }
|
||||||
static llvm::Type *llvm_type();
|
static llvm::Type *llvm_type();
|
||||||
|
|
Loading…
Reference in a new issue