mirror of
https://github.com/ZDoom/qzdoom.git
synced 2024-11-29 15:32:54 +00:00
Change triangle full block to use vectored load and store
This commit is contained in:
parent
08c4f2ac18
commit
845bf93c95
5 changed files with 152 additions and 104 deletions
|
@ -303,57 +303,80 @@ void DrawTriangleCodegen::LoopFullBlock()
|
||||||
{
|
{
|
||||||
int pixelsize = truecolor ? 4 : 1;
|
int pixelsize = truecolor ? 4 : 1;
|
||||||
|
|
||||||
stack_iy.store(SSAInt(0));
|
for (int iy = 0; iy < q; iy++)
|
||||||
stack_buffer.store(dest[x * pixelsize]);
|
|
||||||
stack_subsectorbuffer.store(subsectorGBuffer[x]);
|
|
||||||
|
|
||||||
SSAForBlock loopy;
|
|
||||||
SSAInt iy = stack_iy.load();
|
|
||||||
SSAUBytePtr buffer = stack_buffer.load();
|
|
||||||
SSAIntPtr subsectorbuffer = stack_subsectorbuffer.load();
|
|
||||||
loopy.loop_block(iy < SSAInt(q), q);
|
|
||||||
{
|
{
|
||||||
|
SSAUBytePtr buffer = dest[(x + iy * pitch) * pixelsize];
|
||||||
|
SSAIntPtr subsectorbuffer = subsectorGBuffer[x + iy * pitch];
|
||||||
|
|
||||||
|
SSAInt varying[TriVertex::NumVarying];
|
||||||
SSAInt varyingStep[TriVertex::NumVarying];
|
SSAInt varyingStep[TriVertex::NumVarying];
|
||||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
{
|
{
|
||||||
stack_varying[i].store((varyingPos[i] + varyingStepPos[i] * iy) << 8);
|
varying[i] = (varyingPos[i] + varyingStepPos[i] * iy) << 8;
|
||||||
varyingStep[i] = (varyingStartStepX[i] + varyingIncrStepX[i] * iy) << 8;
|
varyingStep[i] = (varyingStartStepX[i] + varyingIncrStepX[i] * iy) << 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
stack_ix.store(SSAInt(0));
|
for (int ix = 0; ix < q; ix += 4)
|
||||||
SSAForBlock loopx;
|
|
||||||
SSAInt ix = stack_ix.load();
|
|
||||||
SSAInt varying[TriVertex::NumVarying];
|
|
||||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
|
||||||
varying[i] = stack_varying[i].load();
|
|
||||||
loopx.loop_block(ix < SSAInt(q), q);
|
|
||||||
{
|
{
|
||||||
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
SSAUBytePtr buf = buffer[ix * pixelsize];
|
||||||
|
if (truecolor)
|
||||||
{
|
{
|
||||||
SSAIfBlock branch;
|
SSAVec16ub pixels16 = buf.load_unaligned_vec16ub(false);
|
||||||
branch.if_block(subsectorbuffer[ix].load(true) >= subsectorDepth);
|
SSAVec8s pixels8hi = SSAVec8s::extendhi(pixels16);
|
||||||
|
SSAVec8s pixels8lo = SSAVec8s::extendlo(pixels16);
|
||||||
|
SSAVec4i pixels[4] =
|
||||||
{
|
{
|
||||||
ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying);
|
SSAVec4i::extendlo(pixels8lo),
|
||||||
|
SSAVec4i::extendhi(pixels8lo),
|
||||||
|
SSAVec4i::extendlo(pixels8hi),
|
||||||
|
SSAVec4i::extendhi(pixels8hi)
|
||||||
|
};
|
||||||
|
|
||||||
|
for (int sse = 0; sse < 4; sse++)
|
||||||
|
{
|
||||||
|
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||||
|
{
|
||||||
|
SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth;
|
||||||
|
pixels[sse] = subsectorTest.select(ProcessPixel32(pixels[sse], varying), pixels[sse]);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pixels[sse] = ProcessPixel32(pixels[sse], varying);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
varying[i] = varying[i] + varyingStep[i];
|
||||||
}
|
}
|
||||||
branch.end_block();
|
|
||||||
|
buf.store_unaligned_vec16ub(SSAVec16ub(SSAVec8s(pixels[0], pixels[1]), SSAVec8s(pixels[2], pixels[3])));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying);
|
SSAVec4i pixels = buf.load_vec4ub(false);
|
||||||
|
|
||||||
|
for (int sse = 0; sse < 4; sse++)
|
||||||
|
{
|
||||||
|
if (variant == TriDrawVariant::DrawSubsector || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||||
|
{
|
||||||
|
SSABool subsectorTest = subsectorbuffer[ix].load(true) >= subsectorDepth;
|
||||||
|
pixels.insert(sse, subsectorTest.select(ProcessPixel8(pixels[sse], varying), pixels[sse]));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pixels.insert(sse, ProcessPixel8(pixels[sse], varying));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
varying[i] = varying[i] + varyingStep[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
buf.store_vec4ub(pixels);
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector)
|
||||||
stack_varying[i].store(varying[i] + varyingStep[i]);
|
subsectorbuffer[ix].store_unaligned_vec4i(SSAVec4i(subsectorDepth));
|
||||||
|
|
||||||
stack_ix.store(ix + 1);
|
|
||||||
}
|
}
|
||||||
loopx.end_block();
|
|
||||||
|
|
||||||
stack_buffer.store(buffer[pitch * pixelsize]);
|
|
||||||
stack_subsectorbuffer.store(subsectorbuffer[pitch]);
|
|
||||||
stack_iy.store(iy + 1);
|
|
||||||
}
|
}
|
||||||
loopy.end_block();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector)
|
if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector)
|
||||||
|
@ -425,7 +448,21 @@ void DrawTriangleCodegen::LoopPartialBlock()
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ProcessPixel(buffer[ix * pixelsize], subsectorbuffer[ix], varying);
|
SSAUBytePtr buf = buffer[ix * pixelsize];
|
||||||
|
|
||||||
|
if (truecolor)
|
||||||
|
{
|
||||||
|
SSAVec4i bg = buf.load_vec4ub(false);
|
||||||
|
buf.store_vec4ub(ProcessPixel32(bg, varying));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
SSAUByte bg = buf.load(false);
|
||||||
|
buf.store(ProcessPixel8(bg.zext_int(), varying).trunc_ubyte());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector)
|
||||||
|
subsectorbuffer[ix].store(subsectorDepth);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
branch.end_block();
|
branch.end_block();
|
||||||
|
@ -466,7 +503,7 @@ SSAVec4i DrawTriangleCodegen::Sample(SSAInt uvoffset)
|
||||||
return texturePixels[uvoffset * 4].load_vec4ub(true);
|
return texturePixels[uvoffset * 4].load_vec4ub(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying)
|
SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying)
|
||||||
{
|
{
|
||||||
SSAInt ufrac = varying[0];
|
SSAInt ufrac = varying[0];
|
||||||
SSAInt vfrac = varying[1];
|
SSAInt vfrac = varying[1];
|
||||||
|
@ -475,80 +512,81 @@ void DrawTriangleCodegen::ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbu
|
||||||
SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16;
|
SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16;
|
||||||
SSAInt uvoffset = upos * textureHeight + vpos;
|
SSAInt uvoffset = upos * textureHeight + vpos;
|
||||||
|
|
||||||
if (truecolor)
|
SSAVec4i fg;
|
||||||
|
SSAInt alpha, inv_alpha;
|
||||||
|
SSAVec4i output;
|
||||||
|
|
||||||
|
switch (blendmode)
|
||||||
{
|
{
|
||||||
SSAVec4i fg;
|
default:
|
||||||
SSAVec4i bg = buffer.load_vec4ub(false);
|
case TriBlendMode::Copy:
|
||||||
SSAInt alpha, inv_alpha;
|
fg = Sample(uvoffset);
|
||||||
SSAVec4i output;
|
output = blend_copy(shade_bgra_simple(fg, currentlight)); break;
|
||||||
|
case TriBlendMode::AlphaBlend:
|
||||||
|
fg = Sample(uvoffset);
|
||||||
|
output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break;
|
||||||
|
case TriBlendMode::AddSolid:
|
||||||
|
fg = Sample(uvoffset);
|
||||||
|
output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); break;
|
||||||
|
case TriBlendMode::Add:
|
||||||
|
fg = Sample(uvoffset);
|
||||||
|
output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break;
|
||||||
|
case TriBlendMode::Sub:
|
||||||
|
fg = Sample(uvoffset);
|
||||||
|
output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break;
|
||||||
|
case TriBlendMode::RevSub:
|
||||||
|
fg = Sample(uvoffset);
|
||||||
|
output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break;
|
||||||
|
case TriBlendMode::Shaded:
|
||||||
|
fg = Sample(uvoffset);
|
||||||
|
alpha = fg[0];
|
||||||
|
alpha = alpha + (alpha >> 7); // 255 -> 256
|
||||||
|
inv_alpha = 256 - alpha;
|
||||||
|
output = blend_add(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), bg, alpha, inv_alpha);
|
||||||
|
break;
|
||||||
|
case TriBlendMode::TranslateCopy:
|
||||||
|
fg = TranslateSample(uvoffset);
|
||||||
|
output = blend_copy(shade_bgra_simple(fg, currentlight));
|
||||||
|
break;
|
||||||
|
case TriBlendMode::TranslateAlphaBlend:
|
||||||
|
fg = TranslateSample(uvoffset);
|
||||||
|
output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break;
|
||||||
|
break;
|
||||||
|
case TriBlendMode::TranslateAdd:
|
||||||
|
fg = TranslateSample(uvoffset);
|
||||||
|
output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
||||||
|
break;
|
||||||
|
case TriBlendMode::TranslateSub:
|
||||||
|
fg = TranslateSample(uvoffset);
|
||||||
|
output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
||||||
|
break;
|
||||||
|
case TriBlendMode::TranslateRevSub:
|
||||||
|
fg = TranslateSample(uvoffset);
|
||||||
|
output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
switch (blendmode)
|
return output;
|
||||||
{
|
}
|
||||||
default:
|
|
||||||
case TriBlendMode::Copy:
|
|
||||||
fg = Sample(uvoffset);
|
|
||||||
output = blend_copy(shade_bgra_simple(fg, currentlight)); break;
|
|
||||||
case TriBlendMode::AlphaBlend:
|
|
||||||
fg = Sample(uvoffset);
|
|
||||||
output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break;
|
|
||||||
case TriBlendMode::AddSolid:
|
|
||||||
fg = Sample(uvoffset);
|
|
||||||
output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); break;
|
|
||||||
case TriBlendMode::Add:
|
|
||||||
fg = Sample(uvoffset);
|
|
||||||
output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break;
|
|
||||||
case TriBlendMode::Sub:
|
|
||||||
fg = Sample(uvoffset);
|
|
||||||
output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break;
|
|
||||||
case TriBlendMode::RevSub:
|
|
||||||
fg = Sample(uvoffset);
|
|
||||||
output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break;
|
|
||||||
case TriBlendMode::Shaded:
|
|
||||||
fg = Sample(uvoffset);
|
|
||||||
alpha = fg[0];
|
|
||||||
alpha = alpha + (alpha >> 7); // 255 -> 256
|
|
||||||
inv_alpha = 256 - alpha;
|
|
||||||
output = blend_add(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), bg, alpha, inv_alpha);
|
|
||||||
break;
|
|
||||||
case TriBlendMode::TranslateCopy:
|
|
||||||
fg = TranslateSample(uvoffset);
|
|
||||||
output = blend_copy(shade_bgra_simple(fg, currentlight));
|
|
||||||
break;
|
|
||||||
case TriBlendMode::TranslateAlphaBlend:
|
|
||||||
fg = TranslateSample(uvoffset);
|
|
||||||
output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break;
|
|
||||||
break;
|
|
||||||
case TriBlendMode::TranslateAdd:
|
|
||||||
fg = TranslateSample(uvoffset);
|
|
||||||
output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
|
||||||
break;
|
|
||||||
case TriBlendMode::TranslateSub:
|
|
||||||
fg = TranslateSample(uvoffset);
|
|
||||||
output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
|
||||||
break;
|
|
||||||
case TriBlendMode::TranslateRevSub:
|
|
||||||
fg = TranslateSample(uvoffset);
|
|
||||||
output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer.store_vec4ub(output);
|
SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying)
|
||||||
|
{
|
||||||
|
SSAInt ufrac = varying[0];
|
||||||
|
SSAInt vfrac = varying[1];
|
||||||
|
|
||||||
|
SSAInt upos = ((ufrac >> 16) * textureWidth) >> 16;
|
||||||
|
SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16;
|
||||||
|
SSAInt uvoffset = upos * textureHeight + vpos;
|
||||||
|
|
||||||
|
if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
||||||
|
{
|
||||||
|
return color;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector)
|
SSAUByte fg = texturePixels[uvoffset].load(true);
|
||||||
{
|
return fg.zext_int();
|
||||||
buffer.store(color.trunc_ubyte());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SSAUByte fg = texturePixels[uvoffset].load(true);
|
|
||||||
buffer.store(fg);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (variant != TriDrawVariant::DrawSubsector && variant != TriDrawVariant::FillSubsector && variant != TriDrawVariant::FuzzSubsector)
|
|
||||||
subsectorbuffer.store(subsectorDepth);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void DrawTriangleCodegen::SetStencilBlock(SSAInt block)
|
void DrawTriangleCodegen::SetStencilBlock(SSAInt block)
|
||||||
|
|
|
@ -46,7 +46,8 @@ private:
|
||||||
void LoopFullBlock();
|
void LoopFullBlock();
|
||||||
void LoopPartialBlock();
|
void LoopPartialBlock();
|
||||||
|
|
||||||
void ProcessPixel(SSAUBytePtr buffer, SSAIntPtr subsectorbuffer, SSAInt *varying);
|
SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying);
|
||||||
|
SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying);
|
||||||
|
|
||||||
SSAVec4i TranslateSample(SSAInt uvoffset);
|
SSAVec4i TranslateSample(SSAInt uvoffset);
|
||||||
SSAVec4i Sample(SSAInt uvoffset);
|
SSAVec4i Sample(SSAInt uvoffset);
|
||||||
|
|
|
@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance()
|
||||||
|
|
||||||
LLVMDrawersImpl::LLVMDrawersImpl()
|
LLVMDrawersImpl::LLVMDrawersImpl()
|
||||||
{
|
{
|
||||||
int version = 1; // Increment this number if the drawer codegen is modified (forces recreation of the module).
|
int version = 2; // Increment this number if the drawer codegen is modified (forces recreation of the module).
|
||||||
std::string targetCPU = mProgram.GetTargetCPU();
|
std::string targetCPU = mProgram.GetTargetCPU();
|
||||||
bool loaded = mProgram.LoadCachedModule(version, targetCPU);
|
bool loaded = mProgram.LoadCachedModule(version, targetCPU);
|
||||||
if (!loaded)
|
if (!loaded)
|
||||||
|
|
|
@ -23,6 +23,7 @@
|
||||||
#include "r_compiler/llvm_include.h"
|
#include "r_compiler/llvm_include.h"
|
||||||
#include "ssa_bool.h"
|
#include "ssa_bool.h"
|
||||||
#include "ssa_ubyte.h"
|
#include "ssa_ubyte.h"
|
||||||
|
#include "ssa_vec4i.h"
|
||||||
#include "ssa_value.h"
|
#include "ssa_value.h"
|
||||||
#include "ssa_scope.h"
|
#include "ssa_scope.h"
|
||||||
|
|
||||||
|
@ -61,6 +62,11 @@ SSAUByte SSABool::select(SSAUByte a, SSAUByte b)
|
||||||
return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint()));
|
return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SSAVec4i SSABool::select(SSAVec4i a, SSAVec4i b)
|
||||||
|
{
|
||||||
|
return SSAValue::from_llvm(SSAScope::builder().CreateSelect(v, a.v, b.v, SSAScope::hint()));
|
||||||
|
}
|
||||||
|
|
||||||
SSABool operator&&(const SSABool &a, const SSABool &b)
|
SSABool operator&&(const SSABool &a, const SSABool &b)
|
||||||
{
|
{
|
||||||
return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint()));
|
return SSABool::from_llvm(SSAScope::builder().CreateAnd(a.v, b.v, SSAScope::hint()));
|
||||||
|
|
|
@ -29,6 +29,8 @@
|
||||||
namespace llvm { class Value; }
|
namespace llvm { class Value; }
|
||||||
namespace llvm { class Type; }
|
namespace llvm { class Type; }
|
||||||
|
|
||||||
|
class SSAVec4i;
|
||||||
|
|
||||||
class SSABool
|
class SSABool
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
@ -41,6 +43,7 @@ public:
|
||||||
SSAInt zext_int();
|
SSAInt zext_int();
|
||||||
SSAInt select(SSAInt a, SSAInt b);
|
SSAInt select(SSAInt a, SSAInt b);
|
||||||
SSAUByte select(SSAUByte a, SSAUByte b);
|
SSAUByte select(SSAUByte a, SSAUByte b);
|
||||||
|
SSAVec4i select(SSAVec4i a, SSAVec4i b);
|
||||||
|
|
||||||
llvm::Value *v;
|
llvm::Value *v;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue