mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-11 15:21:51 +00:00
Masked store experiment (which turned out to be much slower)
This commit is contained in:
parent
c89e023bd5
commit
e3b3b7a4ce
7 changed files with 206 additions and 4 deletions
|
@ -541,6 +541,131 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
|||
loopy.end_block();
|
||||
}
|
||||
|
||||
#if 0
|
||||
void DrawTriangleCodegen::LoopMaskedStoreBlock()
|
||||
{
|
||||
if (variant == TriDrawVariant::Stencil)
|
||||
{
|
||||
}
|
||||
else if (variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
}
|
||||
else
|
||||
{
|
||||
int pixelsize = truecolor ? 4 : 1;
|
||||
|
||||
AffineW = posx_w;
|
||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||
AffineVaryingPosY[i] = posx_varying[i];
|
||||
|
||||
SSAInt CY1 = C1 + DX12 * y0 - DY12 * x0;
|
||||
SSAInt CY2 = C2 + DX23 * y0 - DY23 * x0;
|
||||
SSAInt CY3 = C3 + DX31 * y0 - DY31 * x0;
|
||||
|
||||
for (int iy = 0; iy < q; iy++)
|
||||
{
|
||||
SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize];
|
||||
SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch];
|
||||
|
||||
SetupAffineBlock();
|
||||
|
||||
SSAInt CX1 = CY1;
|
||||
SSAInt CX2 = CY2;
|
||||
SSAInt CX3 = CY3;
|
||||
|
||||
for (int ix = 0; ix < q; ix += 4)
|
||||
{
|
||||
SSABool covered[4];
|
||||
for (int maskindex = 0; maskindex < 4; maskindex++)
|
||||
{
|
||||
covered[maskindex] = CX1 > SSAInt(0) && CX2 > SSAInt(0) && CX3 > SSAInt(0);
|
||||
|
||||
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||
{
|
||||
auto xx = SSAInt(ix + maskindex);
|
||||
auto yy = SSAInt(iy);
|
||||
covered[maskindex] = covered[maskindex] && SSABool::compare_uge(StencilGet(xx, yy), stencilTestValue) && subsectorbuffer[ix + maskindex].load(true) >= subsectorDepth;
|
||||
}
|
||||
else if (variant == TriDrawVariant::StencilClose)
|
||||
{
|
||||
auto xx = SSAInt(ix + maskindex);
|
||||
auto yy = SSAInt(iy);
|
||||
covered[maskindex] = covered[maskindex] && SSABool::compare_uge(StencilGet(xx, yy), stencilTestValue);
|
||||
}
|
||||
else
|
||||
{
|
||||
auto xx = SSAInt(ix + maskindex);
|
||||
auto yy = SSAInt(iy);
|
||||
covered[maskindex] = covered[maskindex] && StencilGet(xx, yy) == stencilTestValue;
|
||||
}
|
||||
|
||||
CX1 = CX1 - FDY12;
|
||||
CX2 = CX2 - FDY23;
|
||||
CX3 = CX3 - FDY31;
|
||||
}
|
||||
|
||||
SSAUBytePtr buf = buffer[ix * pixelsize];
|
||||
if (truecolor)
|
||||
{
|
||||
SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false);
|
||||
SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16);
|
||||
SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16);
|
||||
SSAVec4i pixels[4] =
|
||||
{
|
||||
SSAVec4i::extendlo(pixels8lo),
|
||||
SSAVec4i::extendhi(pixels8lo),
|
||||
SSAVec4i::extendlo(pixels8hi),
|
||||
SSAVec4i::extendhi(pixels8hi)
|
||||
};
|
||||
|
||||
for (int sse = 0; sse < 4; sse++)
|
||||
{
|
||||
pixels[sse] = ProcessPixel32(pixels[sse], AffineVaryingPosX);
|
||||
|
||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||
AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i];
|
||||
}
|
||||
|
||||
buf.store_masked_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3])), covered);
|
||||
}
|
||||
else
|
||||
{
|
||||
SSAVec4i pixelsvec = buf.load_vec4ub(false);
|
||||
SSAInt pixels[4] =
|
||||
{
|
||||
pixelsvec[0],
|
||||
pixelsvec[1],
|
||||
pixelsvec[2],
|
||||
pixelsvec[3]
|
||||
};
|
||||
|
||||
for (int sse = 0; sse < 4; sse++)
|
||||
{
|
||||
pixels[sse] = ProcessPixel8(pixels[sse], AffineVaryingPosX);
|
||||
|
||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||
AffineVaryingPosX[i] = AffineVaryingPosX[i] + AffineVaryingStepX[i];
|
||||
}
|
||||
|
||||
buf.store_masked_vec4ub(SSAVec4i(pixels[0], pixels[1], pixels[2], pixels[3]), covered);
|
||||
}
|
||||
|
||||
if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector)
|
||||
subsectorbuffer[ix].store_masked_vec4i(SSAVec4i(subsectorDepth), covered);
|
||||
}
|
||||
|
||||
AffineW = AffineW + gradWY;
|
||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||
AffineVaryingPosY[i] = AffineVaryingPosY[i] + gradVaryingY[i];
|
||||
|
||||
CY1 = CY1 + FDX12;
|
||||
CY2 = CY2 + FDX23;
|
||||
CY3 = CY3 + FDX31;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt uvoffset)
|
||||
{
|
||||
if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||
|
|
|
@ -86,6 +86,6 @@ void SSAFloatPtr::store_vec4f(const SSAVec4f &new_value)
|
|||
void SSAFloatPtr::store_unaligned_vec4f(const SSAVec4f &new_value)
|
||||
{
|
||||
llvm::PointerType *m4xfloattypeptr = llvm::VectorType::get(llvm::Type::getFloatTy(SSAScope::context()), 4)->getPointerTo();
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 1);
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xfloattypeptr, SSAScope::hint()), 4);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "precomp.h"
|
||||
#include "ssa_int_ptr.h"
|
||||
#include "ssa_scope.h"
|
||||
#include "ssa_bool.h"
|
||||
|
||||
SSAIntPtr::SSAIntPtr()
|
||||
: v(0)
|
||||
|
@ -86,6 +87,25 @@ void SSAIntPtr::store_vec4i(const SSAVec4i &new_value)
|
|||
void SSAIntPtr::store_unaligned_vec4i(const SSAVec4i &new_value)
|
||||
{
|
||||
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1);
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 4);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAIntPtr::store_masked_vec4i(const SSAVec4i &new_value, SSABool mask[4])
|
||||
{
|
||||
// Create mask vector
|
||||
std::vector<llvm::Constant*> maskconstants;
|
||||
maskconstants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false)));
|
||||
llvm::Value *maskValue = llvm::ConstantVector::get(maskconstants);
|
||||
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||
for (int i = 0; i < 4; i++)
|
||||
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, SSAInt(i).v, SSAScope::hint());
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, (uint64_t)i, SSAScope::hint());
|
||||
#endif
|
||||
|
||||
llvm::PointerType *m4xint32typeptr = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4)->getPointerTo();
|
||||
auto inst = SSAScope::builder().CreateMaskedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m4xint32typeptr, SSAScope::hint()), 1, maskValue);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
namespace llvm { class Value; }
|
||||
namespace llvm { class Type; }
|
||||
|
||||
class SSABool;
|
||||
|
||||
class SSAIntPtr
|
||||
{
|
||||
public:
|
||||
|
@ -44,6 +46,7 @@ public:
|
|||
void store(const SSAInt &new_value);
|
||||
void store_vec4i(const SSAVec4i &new_value);
|
||||
void store_unaligned_vec4i(const SSAVec4i &new_value);
|
||||
void store_masked_vec4i(const SSAVec4i &new_value, SSABool mask[4]);
|
||||
|
||||
llvm::Value *v;
|
||||
};
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "precomp.h"
|
||||
#include "ssa_ubyte_ptr.h"
|
||||
#include "ssa_scope.h"
|
||||
#include "ssa_bool.h"
|
||||
|
||||
SSAUBytePtr::SSAUBytePtr()
|
||||
: v(0)
|
||||
|
@ -104,6 +105,37 @@ void SSAUBytePtr::store_vec4ub(const SSAVec4i &new_value)
|
|||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAUBytePtr::store_masked_vec4ub(const SSAVec4i &new_value, SSABool mask[4])
|
||||
{
|
||||
// Store using saturate:
|
||||
SSAVec8s v8s(new_value, new_value);
|
||||
SSAVec16ub v16ub(v8s, v8s);
|
||||
|
||||
// Create mask vector
|
||||
std::vector<llvm::Constant*> maskconstants;
|
||||
maskconstants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false)));
|
||||
llvm::Value *maskValue = llvm::ConstantVector::get(maskconstants);
|
||||
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||
for (int i = 0; i < 4; i++)
|
||||
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, SSAInt(i).v, SSAScope::hint());
|
||||
#else
|
||||
for (int i = 0; i < 4; i++)
|
||||
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i].v, (uint64_t)i, SSAScope::hint());
|
||||
#endif
|
||||
|
||||
llvm::Type *m16xint8type = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16);
|
||||
llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo();
|
||||
std::vector<llvm::Constant*> constants;
|
||||
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0)));
|
||||
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 1)));
|
||||
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 2)));
|
||||
constants.push_back(llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 3)));
|
||||
llvm::Value *shufflemask = llvm::ConstantVector::get(constants);
|
||||
llvm::Value *val_vector = SSAScope::builder().CreateShuffleVector(v16ub.v, llvm::UndefValue::get(m16xint8type), shufflemask, SSAScope::hint());
|
||||
llvm::CallInst *inst = SSAScope::builder().CreateMaskedStore(val_vector, SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), 1, maskValue);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value)
|
||||
{
|
||||
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
|
||||
|
@ -118,6 +150,24 @@ void SSAUBytePtr::store_vec16ub(const SSAVec16ub &new_value)
|
|||
void SSAUBytePtr::store_unaligned_vec16ub(const SSAVec16ub &new_value)
|
||||
{
|
||||
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
|
||||
llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1);
|
||||
llvm::StoreInst *inst = SSAScope::builder().CreateAlignedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 4);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
||||
void SSAUBytePtr::store_masked_vec16ub(const SSAVec16ub &new_value, SSABool mask[4])
|
||||
{
|
||||
std::vector<llvm::Constant*> constants;
|
||||
constants.resize(16, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(1, 0, false)));
|
||||
llvm::Value *maskValue = llvm::ConstantVector::get(constants);
|
||||
#if LLVM_VERSION_MAJOR < 3 || (LLVM_VERSION_MAJOR == 3 && LLVM_VERSION_MINOR < 9)
|
||||
for (int i = 0; i < 16; i++)
|
||||
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i / 4].v, SSAInt(i).v, SSAScope::hint());
|
||||
#else
|
||||
for (int i = 0; i < 16; i++)
|
||||
maskValue = SSAScope::builder().CreateInsertElement(maskValue, mask[i / 4].v, (uint64_t)i, SSAScope::hint());
|
||||
#endif
|
||||
|
||||
llvm::PointerType *m16xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 16)->getPointerTo();
|
||||
llvm::CallInst *inst = SSAScope::builder().CreateMaskedStore(new_value.v, SSAScope::builder().CreateBitCast(v, m16xint8typeptr, SSAScope::hint()), 1, maskValue);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
|
|
@ -31,6 +31,8 @@
|
|||
namespace llvm { class Value; }
|
||||
namespace llvm { class Type; }
|
||||
|
||||
class SSABool;
|
||||
|
||||
class SSAUBytePtr
|
||||
{
|
||||
public:
|
||||
|
@ -46,8 +48,10 @@ public:
|
|||
SSAVec16ub load_unaligned_vec16ub(bool constantScopeDomain) const;
|
||||
void store(const SSAUByte &new_value);
|
||||
void store_vec4ub(const SSAVec4i &new_value);
|
||||
void store_masked_vec4ub(const SSAVec4i &new_value, SSABool mask[4]);
|
||||
void store_vec16ub(const SSAVec16ub &new_value);
|
||||
void store_unaligned_vec16ub(const SSAVec16ub &new_value);
|
||||
void store_masked_vec16ub(const SSAVec16ub &new_value, SSABool mask[4]);
|
||||
|
||||
llvm::Value *v;
|
||||
};
|
||||
|
|
|
@ -68,6 +68,6 @@ void SSAVec4fPtr::store(const SSAVec4f &new_value)
|
|||
|
||||
void SSAVec4fPtr::store_unaligned(const SSAVec4f &new_value)
|
||||
{
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 1, false);
|
||||
auto inst = SSAScope::builder().CreateAlignedStore(new_value.v, v, 4, false);
|
||||
inst->setMetadata(llvm::LLVMContext::MD_noalias, SSAScope::constant_scope_list());
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue