mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-28 06:42:09 +00:00
Remove some multiplications and branching from the triangle drawer
This commit is contained in:
parent
046f5f2b2e
commit
b450ac5047
2 changed files with 90 additions and 61 deletions
|
@ -134,12 +134,20 @@ void DrawTriangleCodegen::Setup()
|
||||||
v3.y = SSAFloat(Y3) * 0.0625f;
|
v3.y = SSAFloat(Y3) * 0.0625f;
|
||||||
gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
|
gradWX = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
|
||||||
gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
|
gradWY = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
|
||||||
startW = v1.w + gradWX * (SSAFloat(minx) - v1.x) + gradWY * (SSAFloat(miny) - v1.y);
|
stack_posy_w.store(v1.w + gradWX * (SSAFloat(minx) - v1.x) + gradWY * (SSAFloat(miny) - v1.y));
|
||||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
{
|
{
|
||||||
gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w);
|
gradVaryingX[i] = gradx(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w);
|
||||||
gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w);
|
gradVaryingY[i] = grady(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.varying[i] * v1.w, v2.varying[i] * v2.w, v3.varying[i] * v3.w);
|
||||||
startVarying[i] = v1.varying[i] * v1.w + gradVaryingX[i] * (SSAFloat(minx) - v1.x) + gradVaryingY[i] * (SSAFloat(miny) - v1.y);
|
stack_posy_varying[i].store(v1.varying[i] * v1.w + gradVaryingX[i] * (SSAFloat(minx) - v1.x) + gradVaryingY[i] * (SSAFloat(miny) - v1.y));
|
||||||
|
}
|
||||||
|
|
||||||
|
gradWX = gradWX * (float)q;
|
||||||
|
gradWY = gradWY * (float)q;
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
{
|
||||||
|
gradVaryingX[i] = gradVaryingX[i] * (float)q;
|
||||||
|
gradVaryingY[i] = gradVaryingY[i] * (float)q;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -161,26 +169,32 @@ void DrawTriangleCodegen::LoopBlockY()
|
||||||
{
|
{
|
||||||
int pixelsize = truecolor ? 4 : 1;
|
int pixelsize = truecolor ? 4 : 1;
|
||||||
|
|
||||||
stack_y.store(miny);
|
SSAInt blocks_skipped = skipped_by_thread(miny / q, thread);
|
||||||
stack_dest.store(dest);
|
stack_y.store(miny + blocks_skipped * q);
|
||||||
stack_subsectorGBuffer.store(subsectorGBuffer);
|
stack_dest.store(dest[blocks_skipped * q * pitch * pixelsize]);
|
||||||
|
stack_subsectorGBuffer.store(subsectorGBuffer[blocks_skipped * q * pitch]);
|
||||||
|
stack_posy_w.store(stack_posy_w.load() + gradWY * blocks_skipped);
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
stack_posy_varying[i].store(stack_posy_varying[i].load() + gradVaryingY[i] * blocks_skipped);
|
||||||
|
|
||||||
SSAForBlock loop;
|
SSAForBlock loop;
|
||||||
y = stack_y.load();
|
y = stack_y.load();
|
||||||
dest = stack_dest.load();
|
dest = stack_dest.load();
|
||||||
subsectorGBuffer = stack_subsectorGBuffer.load();
|
subsectorGBuffer = stack_subsectorGBuffer.load();
|
||||||
|
posy_w = stack_posy_w.load();
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
posy_varying[i] = stack_posy_varying[i].load();
|
||||||
loop.loop_block(y < maxy, 0);
|
loop.loop_block(y < maxy, 0);
|
||||||
{
|
|
||||||
SSAIfBlock branch;
|
|
||||||
branch.if_block((y / q) % thread.num_cores == thread.core);
|
|
||||||
{
|
{
|
||||||
LoopBlockX();
|
LoopBlockX();
|
||||||
}
|
|
||||||
branch.end_block();
|
|
||||||
|
|
||||||
stack_dest.store(dest[q * pitch * pixelsize]);
|
stack_posy_w.store(posy_w + gradWY * thread.num_cores);
|
||||||
stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch]);
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
stack_y.store(y + q);
|
stack_posy_varying[i].store(posy_varying[i] + gradVaryingY[i] * thread.num_cores);
|
||||||
|
|
||||||
|
stack_dest.store(dest[q * pitch * pixelsize * thread.num_cores]);
|
||||||
|
stack_subsectorGBuffer.store(subsectorGBuffer[q * pitch * thread.num_cores]);
|
||||||
|
stack_y.store(y + thread.num_cores * q);
|
||||||
}
|
}
|
||||||
loop.end_block();
|
loop.end_block();
|
||||||
}
|
}
|
||||||
|
@ -188,9 +202,15 @@ void DrawTriangleCodegen::LoopBlockY()
|
||||||
void DrawTriangleCodegen::LoopBlockX()
|
void DrawTriangleCodegen::LoopBlockX()
|
||||||
{
|
{
|
||||||
stack_x.store(minx);
|
stack_x.store(minx);
|
||||||
|
stack_posx_w.store(posy_w);
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
stack_posx_varying[i].store(stack_posy_varying[i].load());
|
||||||
|
|
||||||
SSAForBlock loop;
|
SSAForBlock loop;
|
||||||
x = stack_x.load();
|
x = stack_x.load();
|
||||||
|
posx_w = stack_posx_w.load();
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
posx_varying[i] = stack_posx_varying[i].load();
|
||||||
loop.loop_block(x < maxx, 0);
|
loop.loop_block(x < maxx, 0);
|
||||||
{
|
{
|
||||||
// Corners of block
|
// Corners of block
|
||||||
|
@ -226,51 +246,7 @@ void DrawTriangleCodegen::LoopBlockX()
|
||||||
// Check if block needs clipping
|
// Check if block needs clipping
|
||||||
SSABool clipneeded = x < clipleft || (x + q) > clipright || y < cliptop || (y + q) > clipbottom;
|
SSABool clipneeded = x < clipleft || (x + q) > clipright || y < cliptop || (y + q) > clipbottom;
|
||||||
|
|
||||||
// Calculate varying variables for affine block
|
SetupAffineBlock();
|
||||||
SSAFloat offx0 = SSAFloat(x - minx);
|
|
||||||
SSAFloat offy0 = SSAFloat(y - miny);
|
|
||||||
SSAFloat offx1 = offx0 + SSAFloat(q);
|
|
||||||
SSAFloat offy1 = offy0 + SSAFloat(q);
|
|
||||||
SSAFloat rcpWTL = 1.0f / (startW + offx0 * gradWX + offy0 * gradWY);
|
|
||||||
SSAFloat rcpWTR = 1.0f / (startW + offx1 * gradWX + offy0 * gradWY);
|
|
||||||
SSAFloat rcpWBL = 1.0f / (startW + offx0 * gradWX + offy1 * gradWY);
|
|
||||||
SSAFloat rcpWBR = 1.0f / (startW + offx1 * gradWX + offy1 * gradWY);
|
|
||||||
for (int i = 0; i < TriVertex::NumVarying; i++)
|
|
||||||
{
|
|
||||||
SSAFloat varyingTL = (startVarying[i] + offx0 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTL;
|
|
||||||
SSAFloat varyingTR = (startVarying[i] + offx1 * gradVaryingX[i] + offy0 * gradVaryingY[i]) * rcpWTR;
|
|
||||||
SSAFloat varyingBL = (startVarying[i] + offx0 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBL;
|
|
||||||
SSAFloat varyingBR = (startVarying[i] + offx1 * gradVaryingX[i] + offy1 * gradVaryingY[i]) * rcpWBR;
|
|
||||||
|
|
||||||
SSAFloat startStepX = (varyingTR - varyingTL) * (1.0f / q);
|
|
||||||
SSAFloat endStepX = (varyingBR - varyingBL) * (1.0f / q);
|
|
||||||
SSAFloat incrStepX = (endStepX - startStepX) * (1.0f / q);
|
|
||||||
SSAFloat stepY = (varyingBL - varyingTL) * (1.0f / q);
|
|
||||||
|
|
||||||
varyingPos[i] = SSAInt(varyingTL * SSAFloat((float)0x01000000), false) << 8;
|
|
||||||
varyingStepY[i] = SSAInt(stepY * SSAFloat((float)0x01000000), false) << 8;
|
|
||||||
varyingStartStepX[i] = SSAInt(startStepX * SSAFloat((float)0x01000000), false) << 8;
|
|
||||||
varyingIncrStepX[i] = SSAInt(incrStepX * SSAFloat((float)0x01000000), false) << 8;
|
|
||||||
}
|
|
||||||
|
|
||||||
SSAFloat globVis = SSAFloat(1706.0f);
|
|
||||||
SSAFloat vis = globVis / rcpWTL;
|
|
||||||
SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f;
|
|
||||||
SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f));
|
|
||||||
SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false);
|
|
||||||
|
|
||||||
if (!truecolor)
|
|
||||||
{
|
|
||||||
SSAInt diminishedindex = SSAInt(lightscale * 32.0f, false);
|
|
||||||
SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31));
|
|
||||||
SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex);
|
|
||||||
currentcolormap = Colormaps[colormapindex << 8];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
currentlight = is_fixed_light.select(light, diminishedlight);
|
|
||||||
}
|
|
||||||
|
|
||||||
SetStencilBlock(x / 8 + y / 8 * stencilPitch);
|
SetStencilBlock(x / 8 + y / 8 * stencilPitch);
|
||||||
|
|
||||||
SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue();
|
SSABool covered = a == SSAInt(0xF) && b == SSAInt(0xF) && c == SSAInt(0xF) && !clipneeded && StencilIsSingleValue();
|
||||||
|
@ -289,11 +265,55 @@ void DrawTriangleCodegen::LoopBlockX()
|
||||||
|
|
||||||
branch.end_block();
|
branch.end_block();
|
||||||
|
|
||||||
|
stack_posx_w.store(posx_w + gradWX);
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
stack_posx_varying[i].store(posx_varying[i] + gradVaryingX[i]);
|
||||||
|
|
||||||
stack_x.store(x + q);
|
stack_x.store(x + q);
|
||||||
}
|
}
|
||||||
loop.end_block();
|
loop.end_block();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DrawTriangleCodegen::SetupAffineBlock()
|
||||||
|
{
|
||||||
|
// Calculate varying variables for affine block
|
||||||
|
SSAVec4f rcpW = SSAVec4f::rcp(SSAVec4f(posx_w, posx_w + gradWX, posx_w + gradWY, posx_w + gradWX + gradWY));
|
||||||
|
for (int i = 0; i < TriVertex::NumVarying; i++)
|
||||||
|
{
|
||||||
|
// Top left, top right, bottom left, bottom right:
|
||||||
|
SSAVec4f varying = SSAVec4f(posx_varying[i], posx_varying[i] + gradVaryingX[i], posx_varying[i] + gradVaryingY[i], posx_varying[i] + gradVaryingX[i] + gradVaryingY[i]) * rcpW;
|
||||||
|
|
||||||
|
SSAFloat startStepX = (varying[1] - varying[0]) * (1.0f / q);
|
||||||
|
SSAFloat endStepX = (varying[3] - varying[2]) * (1.0f / q);
|
||||||
|
SSAFloat incrStepX = (endStepX - startStepX) * (1.0f / q);
|
||||||
|
SSAFloat stepY = (varying[2] - varying[0]) * (1.0f / q);
|
||||||
|
|
||||||
|
SSAVec4i ints = SSAVec4i(SSAVec4f(varying[0], stepY, startStepX, incrStepX) * (float)0x01000000) << 8;
|
||||||
|
varyingPos[i] = ints[0];
|
||||||
|
varyingStepY[i] = ints[1];
|
||||||
|
varyingStartStepX[i] = ints[2];
|
||||||
|
varyingIncrStepX[i] = ints[3];
|
||||||
|
}
|
||||||
|
|
||||||
|
SSAFloat globVis = SSAFloat(1706.0f);
|
||||||
|
SSAFloat vis = globVis / rcpW[0];
|
||||||
|
SSAFloat shade = 64.0f - (SSAFloat(light * 255 / 256) + 12.0f) * 32.0f / 128.0f;
|
||||||
|
SSAFloat lightscale = SSAFloat::clamp((shade - SSAFloat::MIN(SSAFloat(24.0f), vis)) / 32.0f, SSAFloat(0.0f), SSAFloat(31.0f / 32.0f));
|
||||||
|
SSAInt diminishedlight = SSAInt(SSAFloat::clamp((1.0f - lightscale) * 256.0f + 0.5f, SSAFloat(0.0f), SSAFloat(256.0f)), false);
|
||||||
|
|
||||||
|
if (!truecolor)
|
||||||
|
{
|
||||||
|
SSAInt diminishedindex = SSAInt(lightscale * 32.0f, false);
|
||||||
|
SSAInt lightindex = SSAInt::MIN((256 - light) * 32 / 256, SSAInt(31));
|
||||||
|
SSAInt colormapindex = is_fixed_light.select(lightindex, diminishedindex);
|
||||||
|
currentcolormap = Colormaps[colormapindex << 8];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
currentlight = is_fixed_light.select(light, diminishedlight);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void DrawTriangleCodegen::LoopFullBlock()
|
void DrawTriangleCodegen::LoopFullBlock()
|
||||||
{
|
{
|
||||||
SSAIfBlock branch_stenciltest;
|
SSAIfBlock branch_stenciltest;
|
||||||
|
@ -835,6 +855,8 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data)
|
||||||
|
|
||||||
thread.core = thread_data[0][0].load(true);
|
thread.core = thread_data[0][0].load(true);
|
||||||
thread.num_cores = thread_data[0][1].load(true);
|
thread.num_cores = thread_data[0][1].load(true);
|
||||||
|
thread.pass_start_y = SSAInt(0);
|
||||||
|
thread.pass_end_y = SSAInt(32000);
|
||||||
}
|
}
|
||||||
|
|
||||||
SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr)
|
SSATriVertex DrawTriangleCodegen::LoadTriVertex(SSAValue ptr)
|
||||||
|
|
|
@ -45,6 +45,7 @@ private:
|
||||||
void LoopBlockX();
|
void LoopBlockX();
|
||||||
void LoopFullBlock();
|
void LoopFullBlock();
|
||||||
void LoopPartialBlock();
|
void LoopPartialBlock();
|
||||||
|
void SetupAffineBlock();
|
||||||
|
|
||||||
SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying);
|
SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying);
|
||||||
SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying);
|
SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying);
|
||||||
|
@ -85,6 +86,9 @@ private:
|
||||||
SSAStack<SSAInt> stack_CY1, stack_CY2, stack_CY3;
|
SSAStack<SSAInt> stack_CY1, stack_CY2, stack_CY3;
|
||||||
SSAStack<SSAInt> stack_CX1, stack_CX2, stack_CX3;
|
SSAStack<SSAInt> stack_CX1, stack_CX2, stack_CX3;
|
||||||
|
|
||||||
|
SSAStack<SSAFloat> stack_posy_w, stack_posy_varying[TriVertex::NumVarying];
|
||||||
|
SSAStack<SSAFloat> stack_posx_w, stack_posx_varying[TriVertex::NumVarying];
|
||||||
|
|
||||||
SSAUBytePtr dest;
|
SSAUBytePtr dest;
|
||||||
SSAInt pitch;
|
SSAInt pitch;
|
||||||
SSATriVertex v1;
|
SSATriVertex v1;
|
||||||
|
@ -131,8 +135,8 @@ private:
|
||||||
SSAInt FDY12, FDY23, FDY31;
|
SSAInt FDY12, FDY23, FDY31;
|
||||||
SSAInt minx, maxx, miny, maxy;
|
SSAInt minx, maxx, miny, maxy;
|
||||||
SSAInt C1, C2, C3;
|
SSAInt C1, C2, C3;
|
||||||
SSAFloat gradWX, gradWY, startW;
|
SSAFloat gradWX, gradWY;
|
||||||
SSAFloat gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying], startVarying[TriVertex::NumVarying];
|
SSAFloat gradVaryingX[TriVertex::NumVarying], gradVaryingY[TriVertex::NumVarying];
|
||||||
|
|
||||||
SSAInt x, y;
|
SSAInt x, y;
|
||||||
SSAInt x0, x1, y0, y1;
|
SSAInt x0, x1, y0, y1;
|
||||||
|
@ -143,6 +147,9 @@ private:
|
||||||
SSAInt varyingStartStepX[TriVertex::NumVarying];
|
SSAInt varyingStartStepX[TriVertex::NumVarying];
|
||||||
SSAInt varyingIncrStepX[TriVertex::NumVarying];
|
SSAInt varyingIncrStepX[TriVertex::NumVarying];
|
||||||
|
|
||||||
|
SSAFloat posy_w, posy_varying[TriVertex::NumVarying];
|
||||||
|
SSAFloat posx_w, posx_varying[TriVertex::NumVarying];
|
||||||
|
|
||||||
SSAUBytePtr StencilBlock;
|
SSAUBytePtr StencilBlock;
|
||||||
SSAIntPtr StencilBlockMask;
|
SSAIntPtr StencilBlockMask;
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue