From e3dc9c93b91e43229676c24800fef8ff5a020c17 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sat, 12 Nov 2016 13:50:28 +0100 Subject: [PATCH] Use a simpler algorithm for the stencil buffer as the old one was too slow --- .../fixedfunction/drawtrianglecodegen.cpp | 52 ++++--------------- src/r_compiler/ssa/ssa_int.cpp | 6 +++ src/r_compiler/ssa/ssa_int.h | 3 ++ src/r_poly.cpp | 3 ++ src/r_poly_triangle.cpp | 13 ++++- src/r_poly_triangle.h | 52 ++++--------------- 6 files changed, 44 insertions(+), 85 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index 5c8b9a736..4819a28cf 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -520,75 +520,41 @@ void DrawTriangleCodegen::StencilSet(SSAInt x, SSAInt y, SSAUByte value) SSAInt mask = StencilBlockMask.load(false); SSAIfBlock branchNeedsUpdate; - branchNeedsUpdate.if_block(!(mask == SSAInt(0xffffffff) && StencilBlock[0].load(false) == value)); + branchNeedsUpdate.if_block(!(mask == (SSAInt(0xffffff00) | value.zext_int()))); SSAIfBlock branchFirstSet; - branchFirstSet.if_block(mask == SSAInt(0xffffffff)); + branchFirstSet.if_block((mask & SSAInt(0xffffff00)) == SSAInt(0xffffff00)); { - SSAUByte val0 = StencilBlock[0].load(false); - for (int i = 1; i < 8 * 8; i++) + SSAUByte val0 = mask.trunc_ubyte(); + for (int i = 0; i < 8 * 8; i++) StencilBlock[i].store(val0); + StencilBlockMask.store(SSAInt(0)); } branchFirstSet.end_block(); - SSAIfBlock branchNeedsUpdate2; - branchNeedsUpdate2.if_block(!(StencilBlock[x + y * 8].load(false) == value)); - StencilBlock[x + y * 8].store(value); - SSAInt leveloffset = SSAInt(0); - for (int i = 1; i < 4; i++) - { - x = x >> 1; - y = y >> 1; - - SSABool differs = - !(StencilBlock[(x << i) + (y << i) * 8].load(false) == value && - StencilBlock[((x + 1) << i) + (y << i) * 8].load(false) == value && - StencilBlock[(x << i) + ((y + 1) << i) * 8].load(false) == value && - StencilBlock[((x + 1) << i) + ((y + 1) << i) * 8].load(false) == value); - - SSAInt levelbit = SSAInt(1) << (leveloffset + x + y * (8 >> i)); - - mask = differs.select(mask & ~levelbit, mask | levelbit); - - leveloffset = leveloffset + (SSAInt(8) >> leveloffset) * (SSAInt(8) >> leveloffset); - } - - SSABool differs = - !(StencilBlock[0].load(false) == value && - StencilBlock[4].load(false) == value && - StencilBlock[4 * 8].load(false) == value && - StencilBlock[4 * 8 + 4].load(false) == value); - - mask = differs.select(mask & ~(1 << 22), mask | (1 << 22)); - - StencilBlockMask.store(mask); - - branchNeedsUpdate2.end_block(); branchNeedsUpdate.end_block(); } SSAUByte DrawTriangleCodegen::StencilGet(SSAInt x, SSAInt y) { - SSABool oneValueBlock = StencilBlockMask.load(false) == SSAInt(0xffffffff); - return oneValueBlock.select(StencilBlock[0].load(false), StencilBlock[x + y * 8].load(false)); + return StencilIsSingleValue().select(StencilBlockMask.load(false).trunc_ubyte(), StencilBlock[x + y * 8].load(false)); } SSAUByte DrawTriangleCodegen::StencilGetSingle() { - return StencilBlock[0].load(false); + return StencilBlockMask.load(false).trunc_ubyte(); } void DrawTriangleCodegen::StencilClear(SSAUByte value) { - StencilBlock[0].store(value); - StencilBlockMask.store(SSAInt(0xffffffff)); + StencilBlockMask.store(SSAInt(0xffffff00) | value.zext_int()); } SSABool DrawTriangleCodegen::StencilIsSingleValue() { - return StencilBlockMask.load(false) == SSAInt(0xffffffff); + return (StencilBlockMask.load(false) & SSAInt(0xffffff00)) == SSAInt(0xffffff00); } void DrawTriangleCodegen::LoadArgs(TriDrawVariant variant, bool truecolor, SSAValue args, SSAValue thread_data) diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 01ad92e3f..b0cd26821 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -23,6 +23,7 @@ #include "r_compiler/llvm_include.h" #include "ssa_int.h" #include "ssa_float.h" +#include "ssa_ubyte.h" #include "ssa_bool.h" #include "ssa_scope.h" @@ -81,6 +82,11 @@ SSAInt SSAInt::ashr(int bits) return SSAInt::from_llvm(SSAScope::builder().CreateAShr(v, bits, SSAScope::hint())); } +SSAUByte SSAInt::trunc_ubyte() +{ + return SSAUByte::from_llvm(SSAScope::builder().CreateTrunc(v, SSAUByte::llvm_type(), SSAScope::hint())); +} + SSAInt operator+(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index ae6e6074f..dab8adcb9 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -26,6 +26,7 @@ namespace llvm { class Value; } namespace llvm { class Type; } class SSAFloat; +class SSAUByte; class SSAInt { @@ -44,6 +45,8 @@ public: SSAInt add(SSAInt b, bool no_unsigned_wrap, bool no_signed_wrap); SSAInt ashr(int bits); + SSAUByte trunc_ubyte(); + llvm::Value *v; }; diff --git a/src/r_poly.cpp b/src/r_poly.cpp index 403adeec0..a4141b94c 100644 --- a/src/r_poly.cpp +++ b/src/r_poly.cpp @@ -41,6 +41,9 @@ CVAR(Bool, r_debug_cull, 0, 0) void RenderPolyBsp::Render() { + if (!r_swtruecolor) // Disable pal rendering for now + return; + // Setup working buffers PolyVertexBuffer::Clear(); SolidSegments.clear(); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 2b8c166dc..3d444d2ad 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -55,14 +55,25 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian auto llvm = LLVMDrawers::Instance(); void(*drawfunc)(const TriDrawTriangleArgs *, WorkerThreadData *); +#if 1 switch (variant) { default: case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? llvm->TriDraw32: llvm->TriDraw8; break; case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? llvm->TriFill32 : llvm->TriFill8; break; case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? llvm->TriDrawSubsector32 : llvm->TriDrawSubsector8; break; - case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil/*llvm->TriStencil*/; break; + case TriDrawVariant::Stencil: drawfunc = llvm->TriStencil; break; } +#else + switch (variant) + { + default: + case TriDrawVariant::Draw: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::draw32 : ScreenPolyTriangleDrawer::draw; break; + case TriDrawVariant::Fill: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::fill32 : ScreenPolyTriangleDrawer::fill; break; + case TriDrawVariant::DrawSubsector: drawfunc = r_swtruecolor ? ScreenPolyTriangleDrawer::drawsubsector32 : llvm->TriDrawSubsector8; break; + case TriDrawVariant::Stencil: drawfunc = ScreenPolyTriangleDrawer::stencil; break; + } +#endif TriDrawTriangleArgs args; args.dest = dc_destorg; diff --git a/src/r_poly_triangle.h b/src/r_poly_triangle.h index a21ff1d6d..f3f63f8f1 100644 --- a/src/r_poly_triangle.h +++ b/src/r_poly_triangle.h @@ -85,70 +85,40 @@ public: void Set(int x, int y, uint8_t value) { - if (ValueMask == 0xffffffff) + if ((ValueMask & 0xffffff00) == 0xffffff00) { - if (Values[0] == value) + if ((ValueMask & 0xff) == value) return; - for (int i = 1; i < 8 * 8; i++) - Values[i] = Values[0]; + for (int i = 0; i < 8 * 8; i++) + Values[i] = (ValueMask & 0xff); + ValueMask = 0; } - if (Values[x + y * 8] == value) - return; - Values[x + y * 8] = value; - - int leveloffset = 0; - for (int i = 1; i < 4; i++) - { - x >>= 1; - y >>= 1; - - bool differs = - Values[(x << i) + (y << i) * 8] != value || - Values[((x + 1) << i) + (y << i) * 8] != value || - Values[(x << i) + ((y + 1) << i) * 8] != value || - Values[((x + 1) << i) + ((y + 1) << i) * 8] != value; - - int levelbit = 1 << (leveloffset + x + y * (8 >> i)); - - if (differs) - ValueMask = ValueMask & ~levelbit; - else - ValueMask = ValueMask | levelbit; - - leveloffset += (8 >> leveloffset) * (8 >> leveloffset); - } - - if (Values[0] != value || Values[4] != value || Values[4 * 8] != value || Values[4 * 8 + 4] != value) - ValueMask = ValueMask & ~(1 << 22); - else - ValueMask = ValueMask | (1 << 22); } uint8_t Get(int x, int y) const { - if (ValueMask == 0xffffffff) - return Values[0]; + if (IsSingleValue()) + return ValueMask & 0xff; else return Values[x + y * 8]; } void Clear(uint8_t value) { - Values[0] = value; - ValueMask = 0xffffffff; + ValueMask = 0xffffff00 | (uint32_t)value; } bool IsSingleValue() const { - return ValueMask == 0xffffffff; + return (ValueMask & 0xffffff00) == 0xffffff00; } private: - uint8_t *Values; // [8 * 8]; - uint32_t &ValueMask; // 4 * 4 + 2 * 2 + 1 bits indicating is Values are the same + uint8_t *Values; + uint32_t &ValueMask; }; class PolySubsectorGBuffer