Experiment with splitting triangle block coverage from block drawing

This commit is contained in:
Magnus Norddahl 2016-12-11 17:39:44 +01:00
parent c48b528824
commit 21341b92a5
2 changed files with 307 additions and 0 deletions

View File

@ -92,6 +92,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian
switch (variant) switch (variant)
{ {
default: default:
//case TriDrawVariant::DrawNormal: drawfunc = &ScreenTriangle::DrawFunc; break;
case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break; case TriDrawVariant::DrawNormal: drawfunc = dest_bgra ? llvm->TriDrawNormal32[bmode] : llvm->TriDrawNormal8[bmode]; break;
case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break; case TriDrawVariant::FillNormal: drawfunc = dest_bgra ? llvm->TriFillNormal32[bmode] : llvm->TriFillNormal8[bmode]; break;
case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break; case TriDrawVariant::DrawSubsector: drawfunc = dest_bgra ? llvm->TriDrawSubsector32[bmode] : llvm->TriDrawSubsector8[bmode]; break;
@ -569,3 +570,263 @@ void PolyVertexBuffer::Clear()
{ {
NextBufferVertex = 0; NextBufferVertex = 0;
} }
/////////////////////////////////////////////////////////////////////////////
void ScreenTriangle::Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
{
const TriVertex &v1 = *args->v1;
const TriVertex &v2 = *args->v2;
const TriVertex &v3 = *args->v3;
int clipright = args->clipright;
int clipbottom = args->clipbottom;
ScreenTriangleFullSpan *span = FullSpans;
ScreenTrianglePartialBlock *partial = PartialBlocks;
int curSpan = 0;
int curPartial = 0;
span->Length = 0;
// 28.4 fixed-point coordinates
const int Y1 = (int)round(16.0f * v1.y);
const int Y2 = (int)round(16.0f * v2.y);
const int Y3 = (int)round(16.0f * v3.y);
const int X1 = (int)round(16.0f * v1.x);
const int X2 = (int)round(16.0f * v2.x);
const int X3 = (int)round(16.0f * v3.x);
// Deltas
const int DX12 = X1 - X2;
const int DX23 = X2 - X3;
const int DX31 = X3 - X1;
const int DY12 = Y1 - Y2;
const int DY23 = Y2 - Y3;
const int DY31 = Y3 - Y1;
// Fixed-point deltas
const int FDX12 = DX12 << 4;
const int FDX23 = DX23 << 4;
const int FDX31 = DX31 << 4;
const int FDY12 = DY12 << 4;
const int FDY23 = DY23 << 4;
const int FDY31 = DY31 << 4;
// Bounding rectangle
int minx = MAX((MIN(MIN(X1, X2), X3) + 0xF) >> 4, 0);
int maxx = MIN((MAX(MAX(X1, X2), X3) + 0xF) >> 4, clipright - 1);
int miny = MAX((MIN(MIN(Y1, Y2), Y3) + 0xF) >> 4, 0);
int maxy = MIN((MAX(MAX(Y1, Y2), Y3) + 0xF) >> 4, clipbottom - 1);
if (minx >= maxx || miny >= maxy)
return;
// Block size, standard 8x8 (must be power of two)
const int q = 8;
// Start in corner of 8x8 block
minx &= ~(q - 1);
miny &= ~(q - 1);
// Half-edge constants
int C1 = DY12 * X1 - DX12 * Y1;
int C2 = DY23 * X2 - DX23 * Y2;
int C3 = DY31 * X3 - DX31 * Y3;
// Correct for fill convention
if (DY12 < 0 || (DY12 == 0 && DX12 > 0)) C1++;
if (DY23 < 0 || (DY23 == 0 && DX23 > 0)) C2++;
if (DY31 < 0 || (DY31 == 0 && DX31 > 0)) C3++;
// First block line for this thread
int core = thread->core;
int num_cores = thread->num_cores;
int core_skip = (num_cores - ((miny / q) - core) % num_cores) % num_cores;
miny += core_skip * q;
// Loop through blocks
for (int y = miny; y < maxy; y += q * num_cores)
{
for (int x = minx; x < maxx; x += q)
{
// Corners of block
int x0 = x << 4;
int x1 = (x + q - 1) << 4;
int y0 = y << 4;
int y1 = (y + q - 1) << 4;
// Evaluate half-space functions
bool a00 = C1 + DX12 * y0 - DY12 * x0 > 0;
bool a10 = C1 + DX12 * y0 - DY12 * x1 > 0;
bool a01 = C1 + DX12 * y1 - DY12 * x0 > 0;
bool a11 = C1 + DX12 * y1 - DY12 * x1 > 0;
int a = (a00 << 0) | (a10 << 1) | (a01 << 2) | (a11 << 3);
bool b00 = C2 + DX23 * y0 - DY23 * x0 > 0;
bool b10 = C2 + DX23 * y0 - DY23 * x1 > 0;
bool b01 = C2 + DX23 * y1 - DY23 * x0 > 0;
bool b11 = C2 + DX23 * y1 - DY23 * x1 > 0;
int b = (b00 << 0) | (b10 << 1) | (b01 << 2) | (b11 << 3);
bool c00 = C3 + DX31 * y0 - DY31 * x0 > 0;
bool c10 = C3 + DX31 * y0 - DY31 * x1 > 0;
bool c01 = C3 + DX31 * y1 - DY31 * x0 > 0;
bool c11 = C3 + DX31 * y1 - DY31 * x1 > 0;
int c = (c00 << 0) | (c10 << 1) | (c01 << 2) | (c11 << 3);
// Skip block when outside an edge
if (a == 0x0 || b == 0x0 || c == 0x0) continue;
// Accept whole block when totally covered
if (a == 0xF && b == 0xF && c == 0xF && x + q <= clipright && y + q <= clipbottom)
{
if (span->Length != 0)
{
span->Length++;
}
else
{
span->X = x;
span->Y = y;
span->Length = 1;
}
}
else // Partially covered block
{
int CY1 = C1 + DX12 * y0 - DY12 * x0;
int CY2 = C2 + DX23 * y0 - DY23 * x0;
int CY3 = C3 + DX31 * y0 - DY31 * x0;
uint64_t mask = 0;
for (int iy = 0; iy < q; iy++)
{
int CX1 = CY1;
int CX2 = CY2;
int CX3 = CY3;
for (int ix = x; ix < x + q; ix++)
{
bool covered = (CX1 > 0 && CX2 > 0 && CX3 > 0 && ix < clipright && iy < clipbottom);
mask <<= 1;
mask |= covered;
CX1 -= FDY12;
CX2 -= FDY23;
CX3 -= FDY31;
}
CY1 += FDX12;
CY2 += FDX23;
CY3 += FDX31;
}
if (mask != 0xffffffffffffffffLL)
{
if (span->Length > 0)
{
curSpan++;
span++;
span->Length = 0;
}
partial->X = x;
partial->Y = y;
partial->Mask0 = (uint32_t)(mask >> 32);
partial->Mask1 = (uint32_t)mask;
partial++;
curPartial++;
}
else if (span->Length != 0)
{
span->Length++;
}
else
{
span->X = x;
span->Y = y;
span->Length = 1;
}
}
}
if (span->Length > 0)
{
curSpan++;
span++;
span->Length = 0;
}
}
NumFullSpans = curSpan;
NumPartialBlocks = curPartial;
}
void ScreenTriangle::Draw(const TriDrawTriangleArgs *args)
{
for (int i = 0; i < NumFullSpans; i++)
{
const auto &span = FullSpans[i];
uint32_t *dest = (uint32_t*)args->dest + span.X + span.Y * args->pitch;
int pitch = args->pitch;
int width = span.Length * 8;
int height = 8;
for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
dest[x] = 0xffffffff;
}
dest += pitch;
}
}
for (int i = 0; i < NumPartialBlocks; i++)
{
const auto &block = PartialBlocks[i];
uint32_t *dest = (uint32_t*)args->dest + block.X + block.Y * args->pitch;
int pitch = args->pitch;
uint32_t mask0 = block.Mask0;
uint32_t mask1 = block.Mask1;
for (int y = 0; y < 4; y++)
{
for (int x = 0; x < 8; x++)
{
if (mask0 & (1<<31))
dest[x] = 0xffff0000;
mask0 <<= 1;
}
dest += pitch;
}
for (int y = 4; y < 8; y++)
{
for (int x = 0; x < 8; x++)
{
if (mask1 & (1<<31))
dest[x] = 0xffff0000;
mask1 <<= 1;
}
dest += pitch;
}
}
}
void ScreenTriangle::DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
{
static ScreenTriangle triangle[8];
triangle[thread->core].Setup(args, thread);
triangle[thread->core].Draw(args);
}
ScreenTriangle::ScreenTriangle()
{
FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8));
PartialBlocksBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8));
FullSpans = FullSpansBuffer.data();
PartialBlocks = PartialBlocksBuffer.data();
}

View File

@ -264,3 +264,49 @@ public:
static TriVertex *GetVertices(int count); static TriVertex *GetVertices(int count);
static void Clear(); static void Clear();
}; };
struct ScreenTriangleBlock
{
float W;
float Varying[TriVertex::NumVarying];
};
struct ScreenTriangleFullSpan
{
uint16_t X;
uint16_t Y;
uint32_t Length;
};
struct ScreenTrianglePartialBlock
{
uint16_t X;
uint16_t Y;
uint32_t Mask0;
uint32_t Mask1;
};
class ScreenTriangle
{
public:
static void DrawFunc(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
ScreenTriangle();
void Setup(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
void Draw(const TriDrawTriangleArgs *args);
ScreenTriangleFullSpan *FullSpans;
ScreenTrianglePartialBlock *PartialBlocks;
int NumFullSpans;
int NumPartialBlocks;
int StartX;
int StartY;
ScreenTriangleBlock Start;
ScreenTriangleBlock GradientX;
ScreenTriangleBlock GradientY;
private:
std::vector<ScreenTriangleFullSpan> FullSpansBuffer;
std::vector<ScreenTrianglePartialBlock> PartialBlocksBuffer;
};