- rewrite screen triangle drawer to be more modular and better support the hwrenderer shaders

This commit is contained in:
Magnus Norddahl 2019-08-05 03:23:04 +02:00
parent ebe377e5de
commit f73470dcf4
4 changed files with 389 additions and 10 deletions

View file

@ -909,6 +909,7 @@ void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *const* ve
}
#endif
#if 0
// Keep varyings in -128 to 128 range if possible
// But don't do this for the skycap mode since the V texture coordinate is used for blending
if (numclipvert > 0 && drawargs.BlendMode() != TriBlendMode::Skycap)
@ -921,6 +922,7 @@ void PolyTriangleThreadData::DrawShadedTriangle(const ShadedTriVertex *const* ve
clippedvert[i].v -= newOriginV;
}
}
#endif
if (twosided && numclipvert > 2)
{

View file

@ -198,16 +198,20 @@ public:
return MAX(c, 0);
}
// Varyings
float worldposX[MAXWIDTH];
float worldposY[MAXWIDTH];
float worldposZ[MAXWIDTH];
uint32_t texel[MAXWIDTH];
int32_t texelV[MAXWIDTH];
uint16_t lightarray[MAXWIDTH];
uint32_t dynlights[MAXWIDTH];
float depthvalues[MAXWIDTH];
uint8_t alphatestbuffer[MAXWIDTH];
struct Scanline
{
float W[MAXWIDTH];
float U[MAXWIDTH];
float V[MAXWIDTH];
float WorldX[MAXWIDTH];
float WorldY[MAXWIDTH];
float WorldZ[MAXWIDTH];
uint32_t FragColor[MAXWIDTH];
#if 0
uint16_t lightarray[MAXWIDTH];
uint32_t dynlights[MAXWIDTH];
#endif
} scanline;
static PolyTriangleThreadData *Get(DrawerThread *thread);

View file

@ -38,6 +38,350 @@
#include "screen_triangle.h"
#include "x86.h"
static void WriteW(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
float startX = x0 + (0.5f - args->v1->x);
float startY = y + (0.5f - args->v1->y);
float posW = args->v1->w + args->gradientX.W * startX + args->gradientY.W * startY;
float stepW = args->gradientX.W;
float* w = thread->scanline.W;
for (int x = x0; x < x1; x++)
{
w[x] = 1.0f / posW;
posW += stepW;
}
}
static void WriteVarying(float pos, float step, int x0, int x1, const float* w, float* varying)
{
for (int x = x0; x < x1; x++)
{
varying[x] = pos * w[x];
pos += step;
}
}
static void WriteVaryings(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
float startX = x0 + (0.5f - args->v1->x);
float startY = y + (0.5f - args->v1->y);
WriteVarying(args->v1->u * args->v1->w + args->gradientX.U * startX + args->gradientY.U * startY, args->gradientX.U, x0, x1, thread->scanline.W, thread->scanline.U);
WriteVarying(args->v1->v * args->v1->w + args->gradientX.V * startX + args->gradientY.V * startY, args->gradientX.V, x0, x1, thread->scanline.W, thread->scanline.V);
WriteVarying(args->v1->worldX * args->v1->w + args->gradientX.WorldX * startX + args->gradientY.WorldX * startY, args->gradientX.WorldX, x0, x1, thread->scanline.W, thread->scanline.WorldX);
WriteVarying(args->v1->worldY * args->v1->w + args->gradientX.WorldY * startX + args->gradientY.WorldY * startY, args->gradientX.WorldY, x0, x1, thread->scanline.W, thread->scanline.WorldY);
WriteVarying(args->v1->worldZ * args->v1->w + args->gradientX.WorldZ * startX + args->gradientY.WorldZ * startY, args->gradientX.WorldZ, x0, x1, thread->scanline.W, thread->scanline.WorldZ);
}
static void WriteBlend(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* dest = (uint32_t*)thread->dest;
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
if (!thread->AlphaTest)
{
for (int x = x0; x < x1; x++)
{
line[x] = thread->scanline.FragColor[x];
}
}
else
{
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
if (fragcolor[x] > 0x7f000000)
line[x] = thread->scanline.FragColor[x];
}
}
}
static void WriteDepth(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
size_t pitch = thread->depthstencil->Width();
float* line = thread->depthstencil->DepthValues() + pitch * y;
float* w = thread->scanline.W;
if (!thread->AlphaTest)
{
for (int x = x0; x < x1; x++)
{
line[x] = w[x];
}
}
else
{
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
if (fragcolor[x] > 0x7f000000)
line[x] = w[x];
}
}
}
static void WriteStencil(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
size_t pitch = thread->depthstencil->Width();
uint8_t* line = thread->depthstencil->StencilValues() + pitch * y;
uint8_t value = thread->drawargs.StencilWriteValue();
if (!thread->AlphaTest)
{
for (int x = x0; x < x1; x++)
{
line[x] = value;
}
}
else
{
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
if (fragcolor[x] > 0x7f000000)
line[x] = value;
}
}
}
static void RunShader(int x0, int x1, PolyTriangleThreadData* thread)
{
int texWidth = thread->drawargs.TextureWidth();
int texHeight = thread->drawargs.TextureHeight();
const uint32_t* texPixels = (const uint32_t*)thread->drawargs.TexturePixels();
for (int x = x0; x < x1; x++)
{
float u = thread->scanline.U[x];
float v = thread->scanline.V[x];
u -= std::floor(u);
v -= std::floor(v);
int texelX = (int)(u * texWidth);
int texelY = (int)(v * texHeight);
uint32_t fg = texPixels[texelX * texHeight + texelY];
thread->scanline.FragColor[x] = fg;
}
}
static void DrawSpan(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
if (thread->drawargs.BlendMode() == TriBlendMode::Fill || thread->drawargs.BlendMode() == TriBlendMode::FillTranslucent)
return;
WriteVaryings(y, x0, x1, args, thread);
RunShader(x0, x1, thread);
if (thread->drawargs.WriteColor())
WriteBlend(y, x0, x1, thread);
if (thread->drawargs.WriteDepth())
WriteDepth(y, x0, x1, thread);
if (thread->drawargs.WriteStencil())
WriteStencil(y, x0, x1, thread);
}
template<typename OptT>
static void TestSpan(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
using namespace TriScreenDrawerModes;
WriteW(y, x0, x1, args, thread);
if ((OptT::Flags & SWTRI_DepthTest) || (OptT::Flags & SWTRI_StencilTest))
{
size_t pitch = thread->depthstencil->Width();
uint8_t* stencilbuffer;
uint8_t* stencilLine;
uint8_t stencilTestValue;
if (OptT::Flags & SWTRI_StencilTest)
{
stencilbuffer = thread->depthstencil->StencilValues();
stencilLine = stencilbuffer + pitch * y;
stencilTestValue = thread->drawargs.StencilTestValue();
}
float* zbuffer;
float* zbufferLine;
float* w;
float depthbias;
if (OptT::Flags & SWTRI_DepthTest)
{
zbuffer = thread->depthstencil->DepthValues();
zbufferLine = zbuffer + pitch * y;
w = thread->scanline.W;
depthbias = thread->depthbias;
}
int x = x0;
int xend = x1;
while (x < xend)
{
int xstart = x;
if ((OptT::Flags & SWTRI_DepthTest) && (OptT::Flags & SWTRI_StencilTest))
{
while (zbufferLine[x] >= w[x] + depthbias && stencilLine[x] == stencilTestValue && x < xend)
x++;
}
else if (OptT::Flags & SWTRI_DepthTest)
{
while (zbufferLine[x] >= w[x] + depthbias && x < xend)
x++;
}
else if (OptT::Flags & SWTRI_StencilTest)
{
while (stencilLine[x] == stencilTestValue && x < xend)
x++;
}
else
{
x = xend;
}
if (x > xstart)
{
DrawSpan(y, xstart, x, args, thread);
}
if ((OptT::Flags & SWTRI_DepthTest) && (OptT::Flags & SWTRI_StencilTest))
{
while ((zbufferLine[x] < w[x] + depthbias || stencilLine[x] != stencilTestValue) && x < xend)
x++;
}
else if (OptT::Flags & SWTRI_DepthTest)
{
while (zbufferLine[x] < w[x] + depthbias && x < xend)
x++;
}
else if (OptT::Flags & SWTRI_StencilTest)
{
while (stencilLine[x] != stencilTestValue && x < xend)
x++;
}
}
}
else
{
DrawSpan(y, x0, x1, args, thread);
}
}
static void SortVertices(const TriDrawTriangleArgs* args, ScreenTriVertex** sortedVertices)
{
sortedVertices[0] = args->v1;
sortedVertices[1] = args->v2;
sortedVertices[2] = args->v3;
if (sortedVertices[1]->y < sortedVertices[0]->y)
std::swap(sortedVertices[0], sortedVertices[1]);
if (sortedVertices[2]->y < sortedVertices[0]->y)
std::swap(sortedVertices[0], sortedVertices[2]);
if (sortedVertices[2]->y < sortedVertices[1]->y)
std::swap(sortedVertices[1], sortedVertices[2]);
}
void ScreenTriangle::Draw(const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
// Sort vertices by Y position
ScreenTriVertex* sortedVertices[3];
SortVertices(args, sortedVertices);
int clipleft = thread->clip.left;
int cliptop = MAX(thread->clip.top, thread->numa_start_y);
int clipright = thread->clip.right;
int clipbottom = MIN(thread->clip.bottom, thread->numa_end_y);
int topY = (int)(sortedVertices[0]->y + 0.5f);
int midY = (int)(sortedVertices[1]->y + 0.5f);
int bottomY = (int)(sortedVertices[2]->y + 0.5f);
topY = MAX(topY, cliptop);
midY = MIN(midY, clipbottom);
bottomY = MIN(bottomY, clipbottom);
if (topY >= bottomY)
return;
void(*testfunc)(int y, int x0, int x1, const TriDrawTriangleArgs * args, PolyTriangleThreadData * thread);
int opt = 0;
if (thread->drawargs.DepthTest()) opt |= TriScreenDrawerModes::SWTRI_DepthTest;
if (thread->drawargs.StencilTest()) opt |= TriScreenDrawerModes::SWTRI_StencilTest;
testfunc = ScreenTriangle::TestSpanOpts[opt];
topY += thread->skipped_by_thread(topY);
int num_cores = thread->num_cores;
// Find start/end X positions for each line covered by the triangle:
int y = topY;
float longDX = sortedVertices[2]->x - sortedVertices[0]->x;
float longDY = sortedVertices[2]->y - sortedVertices[0]->y;
float longStep = longDX / longDY;
float longPos = sortedVertices[0]->x + longStep * (y + 0.5f - sortedVertices[0]->y) + 0.5f;
longStep *= num_cores;
if (y < midY)
{
float shortDX = sortedVertices[1]->x - sortedVertices[0]->x;
float shortDY = sortedVertices[1]->y - sortedVertices[0]->y;
float shortStep = shortDX / shortDY;
float shortPos = sortedVertices[0]->x + shortStep * (y + 0.5f - sortedVertices[0]->y) + 0.5f;
shortStep *= num_cores;
while (y < midY)
{
int x0 = (int)shortPos;
int x1 = (int)longPos;
if (x1 < x0) std::swap(x0, x1);
x0 = clamp(x0, clipleft, clipright);
x1 = clamp(x1, clipleft, clipright);
testfunc(y, x0, x1, args, thread);
shortPos += shortStep;
longPos += longStep;
y += num_cores;
}
}
if (y < bottomY)
{
float shortDX = sortedVertices[2]->x - sortedVertices[1]->x;
float shortDY = sortedVertices[2]->y - sortedVertices[1]->y;
float shortStep = shortDX / shortDY;
float shortPos = sortedVertices[1]->x + shortStep * (y + 0.5f - sortedVertices[1]->y) + 0.5f;
shortStep *= num_cores;
while (y < bottomY)
{
int x0 = (int)shortPos;
int x1 = (int)longPos;
if (x1 < x0) std::swap(x0, x1);
x0 = clamp(x0, clipleft, clipright);
x1 = clamp(x1, clipleft, clipright);
testfunc(y, x0, x1, args, thread);
shortPos += shortStep;
longPos += longStep;
y += num_cores;
}
}
}
void(*ScreenTriangle::TestSpanOpts[])(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread) =
{
&TestSpan<TriScreenDrawerModes::TestSpanOpt0>,
&TestSpan<TriScreenDrawerModes::TestSpanOpt1>,
&TestSpan<TriScreenDrawerModes::TestSpanOpt2>,
&TestSpan<TriScreenDrawerModes::TestSpanOpt3>
};
#if 0
static void SortVertices(const TriDrawTriangleArgs *args, ScreenTriVertex **sortedVertices)
{
sortedVertices[0] = args->v1;
@ -1757,3 +2101,5 @@ void(*ScreenTriangle::TriangleDrawers[])(const TriDrawTriangleArgs *args, PolyTr
};
int ScreenTriangle::FuzzStart = 0;
#endif

View file

@ -127,6 +127,31 @@ enum class TriBlendMode
AddShadedTranslated
};
class ScreenTriangle
{
public:
static void Draw(const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread);
private:
static void(*TestSpanOpts[])(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread);
};
namespace TriScreenDrawerModes
{
enum SWTestSpan
{
SWTRI_DepthTest = 1,
SWTRI_StencilTest = 2
};
struct TestSpanOpt0 { static const int Flags = 0; };
struct TestSpanOpt1 { static const int Flags = 1; };
struct TestSpanOpt2 { static const int Flags = 2; };
struct TestSpanOpt3 { static const int Flags = 3; };
}
#if 0
class ScreenTriangle
{
public:
@ -252,3 +277,5 @@ namespace TriScreenDrawerModes
struct TriangleOpt30 { static const int Flags = 30; };
struct TriangleOpt31 { static const int Flags = 31; };
}
#endif