mirror of
https://github.com/ZDoom/qzdoom.git
synced 2025-03-13 22:33:24 +00:00
- Added subdivision coverage testing and an alternative triangle renderer for speed comparison
This commit is contained in:
parent
52634dbec1
commit
afab50b489
2 changed files with 357 additions and 14 deletions
|
@ -47,10 +47,22 @@
|
|||
class TriangleBlock
|
||||
{
|
||||
public:
|
||||
TriangleBlock(const TriDrawTriangleArgs *args);
|
||||
void Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||
TriangleBlock(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||
void Render();
|
||||
|
||||
private:
|
||||
void RenderSubdivide(int x0, int y0, int x1, int y1);
|
||||
|
||||
enum class CoverageModes { Full, Partial };
|
||||
struct CoverageFull { static const int Mode = (int)CoverageModes::Full; };
|
||||
struct CoveragePartial { static const int Mode = (int)CoverageModes::Partial; };
|
||||
|
||||
template<typename CoverageMode>
|
||||
void RenderBlock(int x0, int y0, int x1, int y1);
|
||||
|
||||
const TriDrawTriangleArgs *args;
|
||||
WorkerThreadData *thread;
|
||||
|
||||
// Block size, standard 8x8 (must be power of two)
|
||||
static const int q = 8;
|
||||
|
||||
|
@ -109,6 +121,14 @@ private:
|
|||
__m128i mDY31;
|
||||
#endif
|
||||
|
||||
enum class CoverageResult
|
||||
{
|
||||
full,
|
||||
partial,
|
||||
none
|
||||
};
|
||||
CoverageResult AreaCoverageTest(int x0, int y0, int x1, int y1);
|
||||
|
||||
void CoverageTest();
|
||||
void StencilEqualTest();
|
||||
void StencilGreaterEqualTest();
|
||||
|
@ -118,7 +138,7 @@ private:
|
|||
void DepthWrite(const TriDrawTriangleArgs *args);
|
||||
};
|
||||
|
||||
TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
|
||||
TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args, WorkerThreadData *thread) : args(args), thread(thread)
|
||||
{
|
||||
const TriVertex &v1 = *args->v1;
|
||||
const TriVertex &v2 = *args->v2;
|
||||
|
@ -189,9 +209,11 @@ TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
|
|||
return;
|
||||
}
|
||||
|
||||
// Start in corner of 8x8 block
|
||||
// Start and end in corner of 8x8 block
|
||||
minx &= ~(q - 1);
|
||||
miny &= ~(q - 1);
|
||||
maxx |= q - 1;
|
||||
maxy |= q - 1;
|
||||
|
||||
// Half-edge constants
|
||||
C1 = DY12 * X1 - DX12 * Y1;
|
||||
|
@ -225,13 +247,59 @@ TriangleBlock::TriangleBlock(const TriDrawTriangleArgs *args)
|
|||
#endif
|
||||
}
|
||||
|
||||
void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||
void TriangleBlock::Render()
|
||||
{
|
||||
RenderSubdivide(minx / q, miny / q, (maxx + 1) / q, (maxy + 1) / q);
|
||||
}
|
||||
|
||||
void TriangleBlock::RenderSubdivide(int x0, int y0, int x1, int y1)
|
||||
{
|
||||
CoverageResult result = AreaCoverageTest(x0 * q, y0 * q, x1 * q, y1 * q);
|
||||
if (result == CoverageResult::full)
|
||||
{
|
||||
RenderBlock<CoverageFull>(x0 * q, y0 * q, x1 * q, y1 * q);
|
||||
}
|
||||
else if (result == CoverageResult::partial)
|
||||
{
|
||||
bool doneX = x1 - x0 <= 8;
|
||||
bool doneY = y1 - y0 <= 8;
|
||||
if (doneX && doneY)
|
||||
{
|
||||
RenderBlock<CoveragePartial>(x0 * q, y0 * q, x1 * q, y1 * q);
|
||||
}
|
||||
else
|
||||
{
|
||||
int midx = (x0 + x1) >> 1;
|
||||
int midy = (y0 + y1) >> 1;
|
||||
if (doneX)
|
||||
{
|
||||
RenderSubdivide(x0, y0, x1, midy);
|
||||
RenderSubdivide(x0, midy, x1, y1);
|
||||
}
|
||||
else if (doneY)
|
||||
{
|
||||
RenderSubdivide(x0, y0, midx, y1);
|
||||
RenderSubdivide(midx, y0, x1, y1);
|
||||
}
|
||||
else
|
||||
{
|
||||
RenderSubdivide(x0, y0, midx, midy);
|
||||
RenderSubdivide(midx, y0, x1, midy);
|
||||
RenderSubdivide(x0, midy, midx, y1);
|
||||
RenderSubdivide(midx, midy, x1, y1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<typename CoverageModeT>
|
||||
void TriangleBlock::RenderBlock(int x0, int y0, int x1, int y1)
|
||||
{
|
||||
// First block line for this thread
|
||||
int core = thread->core;
|
||||
int num_cores = thread->num_cores;
|
||||
int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores;
|
||||
int start_miny = miny + core_skip * q;
|
||||
int core_skip = (num_cores - ((y0 / q) - core) % num_cores) % num_cores;
|
||||
int start_miny = y0 + core_skip * q;
|
||||
|
||||
bool depthTest = args->uniforms->DepthTest();
|
||||
bool writeColor = args->uniforms->WriteColor();
|
||||
|
@ -242,16 +310,24 @@ void TriangleBlock::Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thre
|
|||
auto drawFunc = args->destBgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode];
|
||||
|
||||
// Loop through blocks
|
||||
for (int y = start_miny; y < maxy; y += q * num_cores)
|
||||
for (int y = start_miny; y < y1; y += q * num_cores)
|
||||
{
|
||||
for (int x = minx; x < maxx; x += q)
|
||||
for (int x = x0; x < x1; x += q)
|
||||
{
|
||||
X = x;
|
||||
Y = y;
|
||||
|
||||
CoverageTest();
|
||||
if (Mask0 == 0 && Mask1 == 0)
|
||||
continue;
|
||||
if (CoverageModeT::Mode == (int)CoverageModes::Full)
|
||||
{
|
||||
Mask0 = 0xffffffff;
|
||||
Mask1 = 0xffffffff;
|
||||
}
|
||||
else
|
||||
{
|
||||
CoverageTest();
|
||||
if (Mask0 == 0 && Mask1 == 0)
|
||||
continue;
|
||||
}
|
||||
|
||||
ClipTest();
|
||||
if (Mask0 == 0 && Mask1 == 0)
|
||||
|
@ -577,6 +653,47 @@ void TriangleBlock::StencilGreaterEqualTest()
|
|||
}
|
||||
}
|
||||
|
||||
TriangleBlock::CoverageResult TriangleBlock::AreaCoverageTest(int x0, int y0, int x1, int y1)
|
||||
{
|
||||
// Corners of block
|
||||
x0 = x0 << 4;
|
||||
x1 = (x1 - 1) << 4;
|
||||
y0 = y0 << 4;
|
||||
y1 = (y1 - 1) << 4;
|
||||
|
||||
// Evaluate half-space functions
|
||||
bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
|
||||
bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0;
|
||||
bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0;
|
||||
bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0;
|
||||
int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3);
|
||||
|
||||
bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0;
|
||||
bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0;
|
||||
bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0;
|
||||
bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0;
|
||||
int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3);
|
||||
|
||||
bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0;
|
||||
bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0;
|
||||
bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0;
|
||||
bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0;
|
||||
int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3);
|
||||
|
||||
if (a == 0 || b == 0 || c == 0) // Skip block when outside an edge
|
||||
{
|
||||
return CoverageResult::none;
|
||||
}
|
||||
else if (a == 0xf && b == 0xf && c == 0xf) // Accept whole block when totally covered
|
||||
{
|
||||
return CoverageResult::full;
|
||||
}
|
||||
else // Partially covered block
|
||||
{
|
||||
return CoverageResult::partial;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef NO_SSE
|
||||
|
||||
void TriangleBlock::CoverageTest()
|
||||
|
@ -954,12 +1071,231 @@ void TriangleBlock::DepthWrite(const TriDrawTriangleArgs *args)
|
|||
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
|
||||
void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||
{
|
||||
TriangleBlock block(args);
|
||||
block.Loop(args, thread);
|
||||
TriangleBlock block(args, thread);
|
||||
block.Render();
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
static void SortVertices(const TriDrawTriangleArgs *args, TriVertex **sortedVertices)
|
||||
{
|
||||
sortedVertices[0] = args->v1;
|
||||
sortedVertices[1] = args->v2;
|
||||
sortedVertices[2] = args->v3;
|
||||
|
||||
if (sortedVertices[1]->y < sortedVertices[0]->y)
|
||||
std::swap(sortedVertices[0], sortedVertices[1]);
|
||||
if (sortedVertices[2]->y < sortedVertices[0]->y)
|
||||
std::swap(sortedVertices[0], sortedVertices[2]);
|
||||
if (sortedVertices[2]->y < sortedVertices[1]->y)
|
||||
std::swap(sortedVertices[1], sortedVertices[2]);
|
||||
}
|
||||
|
||||
void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||
{
|
||||
// Sort vertices by Y position
|
||||
TriVertex *sortedVertices[3];
|
||||
SortVertices(args, sortedVertices);
|
||||
|
||||
int clipright = args->clipright;
|
||||
int clipbottom = args->clipbottom;
|
||||
|
||||
// Ranges that different triangles edges are active
|
||||
int topY = (int)(sortedVertices[0]->y + 0.5f);
|
||||
int midY = (int)(sortedVertices[1]->y + 0.5f);
|
||||
int bottomY = (int)(sortedVertices[2]->y + 0.5f);
|
||||
|
||||
topY = MAX(topY, 0);
|
||||
midY = clamp(midY, 0, clipbottom);
|
||||
bottomY = MIN(bottomY, clipbottom);
|
||||
|
||||
if (topY >= bottomY)
|
||||
return;
|
||||
|
||||
// Find start/end X positions for each line covered by the triangle:
|
||||
|
||||
int leftEdge[MAXHEIGHT];
|
||||
int rightEdge[MAXHEIGHT];
|
||||
|
||||
float longDX = sortedVertices[2]->x - sortedVertices[0]->x;
|
||||
float longDY = sortedVertices[2]->y - sortedVertices[0]->y;
|
||||
float longStep = longDX / longDY;
|
||||
float longPos = sortedVertices[0]->x + longStep * (topY + 0.5f - sortedVertices[0]->y) + 0.5f;
|
||||
|
||||
if (topY < midY)
|
||||
{
|
||||
float shortDX = sortedVertices[1]->x - sortedVertices[0]->x;
|
||||
float shortDY = sortedVertices[1]->y - sortedVertices[0]->y;
|
||||
float shortStep = shortDX / shortDY;
|
||||
float shortPos = sortedVertices[0]->x + shortStep * (topY + 0.5f - sortedVertices[0]->y) + 0.5f;
|
||||
|
||||
for (int y = topY; y < midY; y++)
|
||||
{
|
||||
int x0 = (int)shortPos;
|
||||
int x1 = (int)longPos;
|
||||
if (x1 < x0) std::swap(x0, x1);
|
||||
x0 = clamp(x0, 0, clipright);
|
||||
x1 = clamp(x1, 0, clipright);
|
||||
|
||||
leftEdge[y] = x0;
|
||||
rightEdge[y] = x1;
|
||||
|
||||
shortPos += shortStep;
|
||||
longPos += longStep;
|
||||
}
|
||||
}
|
||||
|
||||
if (midY < bottomY)
|
||||
{
|
||||
float shortDX = sortedVertices[2]->x - sortedVertices[1]->x;
|
||||
float shortDY = sortedVertices[2]->y - sortedVertices[1]->y;
|
||||
float shortStep = shortDX / shortDY;
|
||||
float shortPos = sortedVertices[1]->x + shortStep * (midY + 0.5f - sortedVertices[1]->y) + 0.5f;
|
||||
|
||||
for (int y = midY; y < bottomY; y++)
|
||||
{
|
||||
int x0 = (int)shortPos;
|
||||
int x1 = (int)longPos;
|
||||
if (x1 < x0) std::swap(x0, x1);
|
||||
x0 = clamp(x0, 0, clipright);
|
||||
x1 = clamp(x1, 0, clipright);
|
||||
|
||||
leftEdge[y] = x0;
|
||||
rightEdge[y] = x1;
|
||||
|
||||
shortPos += shortStep;
|
||||
longPos += longStep;
|
||||
}
|
||||
}
|
||||
|
||||
// Make variables local so the compiler can optimize without worrying about pointer aliasing
|
||||
|
||||
bool depthTest = args->uniforms->DepthTest();
|
||||
bool writeColor = args->uniforms->WriteColor();
|
||||
bool writeStencil = args->uniforms->WriteStencil();
|
||||
bool writeDepth = args->uniforms->WriteDepth();
|
||||
|
||||
uint8_t stencilTestValue = args->uniforms->StencilTestValue();
|
||||
uint8_t stencilWriteValue = args->uniforms->StencilWriteValue();
|
||||
|
||||
int bmode = (int)args->uniforms->BlendMode();
|
||||
auto drawFunc = args->destBgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode];
|
||||
|
||||
uint8_t *dest = args->dest;
|
||||
uint8_t *stencilbuffer = args->stencilValues;
|
||||
uint32_t *stencilMasks = args->stencilMasks;
|
||||
float *zbuffer = args->zbuffer;
|
||||
int pitch = args->pitch;
|
||||
int stencilpitch = args->stencilPitch * 8;
|
||||
int color = ((int)(ptrdiff_t)args->uniforms->TexturePixels()) >> 2;
|
||||
|
||||
float v1X = args->v1->x;
|
||||
float v1Y = args->v1->y;
|
||||
float v1W = args->v1->w;
|
||||
float v1U = args->v1->u * v1W;
|
||||
float v1V = args->v1->v * v1W;
|
||||
float stepXW = args->gradientX.W;
|
||||
float stepXU = args->gradientX.U;
|
||||
float stepXV = args->gradientX.V;
|
||||
float stepYW = args->gradientY.W;
|
||||
float stepYU = args->gradientY.U;
|
||||
float stepYV = args->gradientY.V;
|
||||
int texWidth = args->uniforms->TextureWidth();
|
||||
int texHeight = args->uniforms->TextureHeight();
|
||||
const uint8_t *texPixels = args->uniforms->TexturePixels();
|
||||
auto colormaps = args->uniforms->BaseColormap();
|
||||
|
||||
bool is_fixed_light = args->uniforms->FixedLight();
|
||||
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff;
|
||||
uint32_t light = args->uniforms->Light();
|
||||
float shade = 2.0f - (light + 12.0f) / 128.0f;
|
||||
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
|
||||
light += light >> 7; // 255 -> 256
|
||||
|
||||
// Draw the triangle:
|
||||
|
||||
int num_cores = thread->num_cores;
|
||||
for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores)
|
||||
{
|
||||
int x0 = leftEdge[y];
|
||||
int x1 = rightEdge[y];
|
||||
|
||||
uint8_t *destLine = dest + pitch * y;
|
||||
uint8_t *stencilLine = stencilbuffer + stencilpitch * y;
|
||||
float *zbufferLine = zbuffer + stencilpitch * y;
|
||||
|
||||
if ((stencilMasks[y] & 0xffffff00) == 0xffffff00) // First time we draw a line we have to clear the stencil buffer
|
||||
{
|
||||
memset(stencilLine, stencilMasks[y] & 0xff, stencilpitch);
|
||||
stencilMasks[y] = 0;
|
||||
}
|
||||
|
||||
float posXW = v1W + stepXW * (x0 + (0.5f - v1X)) + stepYW * (y + (0.5f - v1Y));
|
||||
float posXU = v1U + stepXU * (x0 + (0.5f - v1X)) + stepYU * (y + (0.5f - v1Y));
|
||||
float posXV = v1V + stepXV * (x0 + (0.5f - v1X)) + stepYV * (y + (0.5f - v1Y));
|
||||
|
||||
int x = x0;
|
||||
while (x < x1)
|
||||
{
|
||||
bool processPixel = true;
|
||||
|
||||
if (!depthTest) // To do: make the stencil test use its own flag for comparison mode instead of abusing the depth test..
|
||||
{
|
||||
processPixel = stencilTestValue == stencilLine[x];
|
||||
}
|
||||
else
|
||||
{
|
||||
processPixel = stencilTestValue >= stencilLine[x] && zbufferLine[x] <= posXW;
|
||||
}
|
||||
|
||||
if (processPixel) // Pixel is visible (passed stencil and depth tests)
|
||||
{
|
||||
if (writeColor)
|
||||
{
|
||||
if (texPixels)
|
||||
{
|
||||
float rcpW = 0x01000000 / posXW;
|
||||
int32_t u = (int32_t)(posXU * rcpW);
|
||||
int32_t v = (int32_t)(posXV * rcpW);
|
||||
|
||||
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
|
||||
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
|
||||
uint8_t fgcolor = texPixels[texelX * texHeight + texelY];
|
||||
|
||||
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * posXW), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
int lightshade = lightpos >> 8;
|
||||
|
||||
lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00;
|
||||
uint8_t shadedfg = colormaps[lightshade + fgcolor];
|
||||
|
||||
if (fgcolor != 0)
|
||||
destLine[x] = shadedfg;
|
||||
}
|
||||
else
|
||||
{
|
||||
destLine[x] = color;
|
||||
}
|
||||
}
|
||||
if (writeStencil)
|
||||
stencilLine[x] = stencilWriteValue;
|
||||
if (writeDepth)
|
||||
zbufferLine[x] = posXW;
|
||||
}
|
||||
|
||||
posXW += stepXW;
|
||||
posXU += stepXU;
|
||||
posXV += stepXV;
|
||||
x++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void(*ScreenTriangle::TriDrawers8[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *) =
|
||||
{
|
||||
&TriScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
|
||||
|
|
|
@ -32,6 +32,13 @@ struct WorkerThreadData
|
|||
{
|
||||
int32_t core;
|
||||
int32_t num_cores;
|
||||
|
||||
// The number of lines to skip to reach the first line to be rendered by this thread
|
||||
int skipped_by_thread(int first_line)
|
||||
{
|
||||
int core_skip = (num_cores - (first_line - core) % num_cores) % num_cores;
|
||||
return core_skip;
|
||||
}
|
||||
};
|
||||
|
||||
struct TriVertex
|
||||
|
|
Loading…
Reference in a new issue