- clean up the triangle setup functions

This commit is contained in:
Magnus Norddahl 2017-04-01 03:44:45 +02:00
parent f48af606df
commit 6db89a2ce1
7 changed files with 785 additions and 1034 deletions

View file

@ -268,7 +268,7 @@ template<typename BlendT, typename SamplerT>
class TriScreenDrawer32 class TriScreenDrawer32
{ {
public: public:
static void Execute(const TriDrawTriangleArgs *args, WorkerThreadData *thread) static void Execute(int x, int y, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
{ {
using namespace TriScreenDrawerModes; using namespace TriScreenDrawerModes;
@ -281,44 +281,37 @@ public:
if (is_simple_shade) if (is_simple_shade)
{ {
if (is_nearest_filter) if (is_nearest_filter)
Loop<SimpleShade, NearestFilter>(args, thread); DrawBlock<SimpleShade, NearestFilter>(x, y, mask0, mask1, args);
else else
Loop<SimpleShade, LinearFilter>(args, thread); DrawBlock<SimpleShade, LinearFilter>(x, y, mask0, mask1, args);
} }
else else
{ {
if (is_nearest_filter) if (is_nearest_filter)
Loop<AdvancedShade, NearestFilter>(args, thread); DrawBlock<AdvancedShade, NearestFilter>(x, y, mask0, mask1, args);
else else
Loop<AdvancedShade, LinearFilter>(args, thread); DrawBlock<AdvancedShade, LinearFilter>(x, y, mask0, mask1, args);
} }
} }
else // no linear filtering for translated, shaded, stencil, fill or skycap else // no linear filtering for translated, shaded, stencil, fill or skycap
{ {
if (is_simple_shade) if (is_simple_shade)
{ {
Loop<SimpleShade, NearestFilter>(args, thread); DrawBlock<SimpleShade, NearestFilter>(x, y, mask0, mask1, args);
} }
else else
{ {
Loop<AdvancedShade, NearestFilter>(args, thread); DrawBlock<AdvancedShade, NearestFilter>(x, y, mask0, mask1, args);
} }
} }
} }
private: private:
template<typename ShadeModeT, typename FilterModeT> template<typename ShadeModeT, typename FilterModeT>
FORCEINLINE static void VECTORCALL Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread) FORCEINLINE static void VECTORCALL DrawBlock(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
{ {
using namespace TriScreenDrawerModes; using namespace TriScreenDrawerModes;
int numSpans = thread->NumFullSpans;
auto fullSpans = thread->FullSpans;
int numBlocks = thread->NumPartialBlocks;
auto partialBlocks = thread->PartialBlocks;
int startX = thread->StartX;
int startY = thread->StartY;
bool is_fixed_light = args->uniforms->FixedLight(); bool is_fixed_light = args->uniforms->FixedLight();
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff;
uint32_t srcalpha = args->uniforms->SrcAlpha(); uint32_t srcalpha = args->uniforms->SrcAlpha();
@ -330,20 +323,24 @@ private:
const TriVertex &v3 = *args->v3; const TriVertex &v3 = *args->v3;
ScreenTriangleStepVariables gradientX; ScreenTriangleStepVariables gradientX;
ScreenTriangleStepVariables gradientY; ScreenTriangleStepVariables gradientY;
ScreenTriangleStepVariables start; ScreenTriangleStepVariables blockPosY;
gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
gradientX.U = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w); gradientX.U = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w);
gradientY.U = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w); gradientY.U = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w);
gradientX.V = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w); gradientX.V = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w);
gradientY.V = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w); gradientY.V = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w);
start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y);
start.U = v1.u * v1.w + gradientX.U * (startX - v1.x) + gradientY.U * (startY - v1.y); blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y);
start.V = v1.v * v1.w + gradientX.V * (startX - v1.x) + gradientY.V * (startY - v1.y); blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y);
gradientX.W *= 8.0f;
gradientX.U *= 8.0f;
gradientX.V *= 8.0f;
// Output // Output
uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; uint32_t * RESTRICT destOrg = (uint32_t*)args->dest;
int pitch = args->pitch; int pitch = args->pitch;
uint32_t *dest = destOrg + destX + destY * pitch;
// Light // Light
uint32_t light = args->uniforms->Light(); uint32_t light = args->uniforms->Light();
@ -388,35 +385,21 @@ private:
desaturate = 0; desaturate = 0;
} }
for (int i = 0; i < numSpans; i++) if (mask0 == 0xffffffff && mask1 == 0xffffffff)
{ {
const auto &span = fullSpans[i]; for (int y = 0; y < 8; y++)
uint32_t *dest = destOrg + span.X + span.Y * pitch;
int width = span.Length;
int height = 8;
ScreenTriangleStepVariables blockPosY;
blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY);
blockPosY.U = start.U + gradientX.U * (span.X - startX) + gradientY.U * (span.Y - startY);
blockPosY.V = start.V + gradientX.V * (span.X - startX) + gradientY.V * (span.Y - startY);
for (int y = 0; y < height; y++)
{ {
ScreenTriangleStepVariables blockPosX = blockPosY; float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
float rcpW = 0x01000000 / blockPosX.W; int32_t posV = (int32_t)(blockPosY.V * rcpW);
int32_t posU = (int32_t)(blockPosX.U * rcpW);
int32_t posV = (int32_t)(blockPosX.V * rcpW);
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
for (int x = 0; x < width; x++) ScreenTriangleStepVariables blockPosX = blockPosY;
{ blockPosX.W += gradientX.W;
blockPosX.W += gradientX.W * 8; blockPosX.U += gradientX.U;
blockPosX.U += gradientX.U * 8; blockPosX.V += gradientX.V;
blockPosX.V += gradientX.V * 8;
rcpW = 0x01000000 / blockPosX.W; rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW); int32_t nextU = (int32_t)(blockPosX.U * rcpW);
@ -433,7 +416,7 @@ private:
// Load bgcolor // Load bgcolor
__m128i bgcolor; __m128i bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque) if (BlendT::Mode != (int)BlendModes::Opaque)
bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + x * 8 + ix * 2)), _mm_setzero_si128()); bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + ix * 2)), _mm_setzero_si128());
else else
bgcolor = _mm_setzero_si128(); bgcolor = _mm_setzero_si128();
@ -473,8 +456,7 @@ private:
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); __m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result // Store result
_mm_storel_epi64((__m128i*)(dest + x * 8 + ix * 2), outcolor); _mm_storel_epi64((__m128i*)(dest + ix * 2), outcolor);
}
} }
blockPosY.W += gradientY.W; blockPosY.W += gradientY.W;
@ -484,35 +466,22 @@ private:
dest += pitch; dest += pitch;
} }
} }
else
for (int i = 0; i < numBlocks; i++)
{ {
const auto &block = partialBlocks[i];
ScreenTriangleStepVariables blockPosY;
blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY);
blockPosY.U = start.U + gradientX.U * (block.X - startX) + gradientY.U * (block.Y - startY);
blockPosY.V = start.V + gradientX.V * (block.X - startX) + gradientY.V * (block.Y - startY);
uint32_t *dest = destOrg + block.X + block.Y * pitch;
uint32_t mask0 = block.Mask0;
uint32_t mask1 = block.Mask1;
// mask0 loop: // mask0 loop:
for (int y = 0; y < 4; y++) for (int y = 0; y < 4; y++)
{ {
ScreenTriangleStepVariables blockPosX = blockPosY; float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
float rcpW = 0x01000000 / blockPosX.W; int32_t posV = (int32_t)(blockPosY.V * rcpW);
int32_t posU = (int32_t)(blockPosX.U * rcpW);
int32_t posV = (int32_t)(blockPosX.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
blockPosX.W += gradientX.W * 8; ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.U += gradientX.U * 8; blockPosX.W += gradientX.W;
blockPosX.V += gradientX.V * 8; blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
rcpW = 0x01000000 / blockPosX.W; rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW); int32_t nextU = (int32_t)(blockPosX.U * rcpW);
@ -590,18 +559,17 @@ private:
// mask1 loop: // mask1 loop:
for (int y = 0; y < 4; y++) for (int y = 0; y < 4; y++)
{ {
ScreenTriangleStepVariables blockPosX = blockPosY; float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
float rcpW = 0x01000000 / blockPosX.W; int32_t posV = (int32_t)(blockPosY.V * rcpW);
int32_t posU = (int32_t)(blockPosX.U * rcpW);
int32_t posV = (int32_t)(blockPosX.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
blockPosX.W += gradientX.W * 8; ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.U += gradientX.U * 8; blockPosX.W += gradientX.W;
blockPosX.V += gradientX.V * 8; blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
rcpW = 0x01000000 / blockPosX.W; rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW); int32_t nextU = (int32_t)(blockPosX.U * rcpW);

View file

@ -209,17 +209,10 @@ template<typename BlendT, typename SamplerT>
class TriScreenDrawer8 class TriScreenDrawer8
{ {
public: public:
static void Execute(const TriDrawTriangleArgs *args, WorkerThreadData *thread) static void Execute(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
{ {
using namespace TriScreenDrawerModes; using namespace TriScreenDrawerModes;
int numSpans = thread->NumFullSpans;
auto fullSpans = thread->FullSpans;
int numBlocks = thread->NumPartialBlocks;
auto partialBlocks = thread->PartialBlocks;
int startX = thread->StartX;
int startY = thread->StartY;
bool is_fixed_light = args->uniforms->FixedLight(); bool is_fixed_light = args->uniforms->FixedLight();
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff;
auto colormaps = args->uniforms->BaseColormap(); auto colormaps = args->uniforms->BaseColormap();
@ -232,20 +225,24 @@ public:
const TriVertex &v3 = *args->v3; const TriVertex &v3 = *args->v3;
ScreenTriangleStepVariables gradientX; ScreenTriangleStepVariables gradientX;
ScreenTriangleStepVariables gradientY; ScreenTriangleStepVariables gradientY;
ScreenTriangleStepVariables start; ScreenTriangleStepVariables blockPosY;
gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w); gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
gradientX.U = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w); gradientX.U = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w);
gradientY.U = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w); gradientY.U = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w);
gradientX.V = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w); gradientX.V = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w);
gradientY.V = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w); gradientY.V = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w);
start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y); blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y);
start.U = v1.u * v1.w + gradientX.U * (startX - v1.x) + gradientY.U * (startY - v1.y); blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y);
start.V = v1.v * v1.w + gradientX.V * (startX - v1.x) + gradientY.V * (startY - v1.y); blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y);
gradientX.W *= 8.0f;
gradientX.U *= 8.0f;
gradientX.V *= 8.0f;
// Output // Output
uint8_t * RESTRICT destOrg = args->dest; uint8_t * RESTRICT destOrg = args->dest;
int pitch = args->pitch; int pitch = args->pitch;
uint8_t *dest = destOrg + destX + destY * pitch;
// Light // Light
uint32_t light = args->uniforms->Light(); uint32_t light = args->uniforms->Light();
@ -260,35 +257,21 @@ public:
uint32_t texWidth = args->uniforms->TextureWidth(); uint32_t texWidth = args->uniforms->TextureWidth();
uint32_t texHeight = args->uniforms->TextureHeight(); uint32_t texHeight = args->uniforms->TextureHeight();
for (int i = 0; i < numSpans; i++) if (mask0 == 0xffffffff && mask1 == 0xffffffff)
{ {
const auto &span = fullSpans[i]; for (int y = 0; y < 8; y++)
uint8_t *dest = destOrg + span.X + span.Y * pitch;
int width = span.Length;
int height = 8;
ScreenTriangleStepVariables blockPosY;
blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY);
blockPosY.U = start.U + gradientX.U * (span.X - startX) + gradientY.U * (span.Y - startY);
blockPosY.V = start.V + gradientX.V * (span.X - startX) + gradientY.V * (span.Y - startY);
for (int y = 0; y < height; y++)
{ {
ScreenTriangleStepVariables blockPosX = blockPosY; float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
float rcpW = 0x01000000 / blockPosX.W; int32_t posV = (int32_t)(blockPosY.V * rcpW);
int32_t posU = (int32_t)(blockPosX.U * rcpW);
int32_t posV = (int32_t)(blockPosX.V * rcpW);
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
for (int x = 0; x < width; x++) ScreenTriangleStepVariables blockPosX = blockPosY;
{ blockPosX.W += gradientX.W;
blockPosX.W += gradientX.W * 8; blockPosX.U += gradientX.U;
blockPosX.U += gradientX.U * 8; blockPosX.V += gradientX.V;
blockPosX.V += gradientX.V * 8;
rcpW = 0x01000000 / blockPosX.W; rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW); int32_t nextU = (int32_t)(blockPosX.U * rcpW);
@ -303,15 +286,14 @@ public:
for (int ix = 0; ix < 8; ix++) for (int ix = 0; ix < 8; ix++)
{ {
int lightshade = lightpos >> 8; int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x * 8 + ix]; uint8_t bgcolor = dest[ix];
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation); uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight); uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
dest[x * 8 + ix] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); dest[ix] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
posU += stepU; posU += stepU;
posV += stepV; posV += stepV;
lightpos += lightstep; lightpos += lightstep;
} }
}
blockPosY.W += gradientY.W; blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U; blockPosY.U += gradientY.U;
@ -320,35 +302,22 @@ public:
dest += pitch; dest += pitch;
} }
} }
else
for (int i = 0; i < numBlocks; i++)
{ {
const auto &block = partialBlocks[i];
ScreenTriangleStepVariables blockPosY;
blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY);
blockPosY.U = start.U + gradientX.U * (block.X - startX) + gradientY.U * (block.Y - startY);
blockPosY.V = start.V + gradientX.V * (block.X - startX) + gradientY.V * (block.Y - startY);
uint8_t *dest = destOrg + block.X + block.Y * pitch;
uint32_t mask0 = block.Mask0;
uint32_t mask1 = block.Mask1;
// mask0 loop: // mask0 loop:
for (int y = 0; y < 4; y++) for (int y = 0; y < 4; y++)
{ {
ScreenTriangleStepVariables blockPosX = blockPosY; float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
float rcpW = 0x01000000 / blockPosX.W; int32_t posV = (int32_t)(blockPosY.V * rcpW);
int32_t posU = (int32_t)(blockPosX.U * rcpW);
int32_t posV = (int32_t)(blockPosX.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
blockPosX.W += gradientX.W * 8; ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.U += gradientX.U * 8; blockPosX.W += gradientX.W;
blockPosX.V += gradientX.V * 8; blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
rcpW = 0x01000000 / blockPosX.W; rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW); int32_t nextU = (int32_t)(blockPosX.U * rcpW);
@ -388,18 +357,17 @@ public:
// mask1 loop: // mask1 loop:
for (int y = 0; y < 4; y++) for (int y = 0; y < 4; y++)
{ {
ScreenTriangleStepVariables blockPosX = blockPosY; float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
float rcpW = 0x01000000 / blockPosX.W; int32_t posV = (int32_t)(blockPosY.V * rcpW);
int32_t posU = (int32_t)(blockPosX.U * rcpW);
int32_t posV = (int32_t)(blockPosX.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
blockPosX.W += gradientX.W * 8; ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.U += gradientX.U * 8; blockPosX.W += gradientX.W;
blockPosX.V += gradientX.V * 8; blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
rcpW = 0x01000000 / blockPosX.W; rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW); int32_t nextU = (int32_t)(blockPosX.U * rcpW);

View file

@ -38,8 +38,6 @@
#include "swrenderer/drawers/r_draw_rgba.h" #include "swrenderer/drawers/r_draw_rgba.h"
#include "screen_triangle.h" #include "screen_triangle.h"
CVAR(Bool, r_debug_trisetup, false, 0);
int PolyTriangleDrawer::viewport_x; int PolyTriangleDrawer::viewport_x;
int PolyTriangleDrawer::viewport_y; int PolyTriangleDrawer::viewport_y;
int PolyTriangleDrawer::viewport_width; int PolyTriangleDrawer::viewport_width;
@ -90,33 +88,13 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
if (drawargs.VertexCount() < 3) if (drawargs.VertexCount() < 3)
return; return;
PolyDrawFuncPtr drawfuncs[4];
int num_drawfuncs = 0;
drawfuncs[num_drawfuncs++] = drawargs.SubsectorTest() ? &ScreenTriangle::SetupSubsector : &ScreenTriangle::SetupNormal;
if (!r_debug_trisetup) // For profiling how much time is spent in setup vs drawal
{
int bmode = (int)drawargs.BlendMode();
if (drawargs.WriteColor())
drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode];
}
if (drawargs.WriteStencil())
drawfuncs[num_drawfuncs++] = &ScreenTriangle::StencilWrite;
if (drawargs.WriteSubsector())
drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite;
TriDrawTriangleArgs args; TriDrawTriangleArgs args;
args.dest = dest; args.dest = dest;
args.pitch = dest_pitch; args.pitch = dest_pitch;
args.clipleft = 0;
args.clipright = dest_width; args.clipright = dest_width;
args.cliptop = 0;
args.clipbottom = dest_height; args.clipbottom = dest_height;
args.uniforms = &drawargs; args.uniforms = &drawargs;
args.destBgra = dest_bgra;
args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth(); args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth();
args.stencilValues = PolyStencilBuffer::Instance()->Values(); args.stencilValues = PolyStencilBuffer::Instance()->Values();
args.stencilMasks = PolyStencilBuffer::Instance()->Masks(); args.stencilMasks = PolyStencilBuffer::Instance()->Masks();
@ -133,7 +111,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
{ {
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
vert[j] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++)); vert[j] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++));
draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs); draw_shaded_triangle(vert, ccw, &args, thread);
} }
} }
else if (drawargs.DrawMode() == PolyDrawMode::TriangleFan) else if (drawargs.DrawMode() == PolyDrawMode::TriangleFan)
@ -143,7 +121,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
for (int i = 2; i < vcount; i++) for (int i = 2; i < vcount; i++)
{ {
vert[2] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++)); vert[2] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++));
draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs); draw_shaded_triangle(vert, ccw, &args, thread);
vert[1] = vert[2]; vert[1] = vert[2];
} }
} }
@ -154,7 +132,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
for (int i = 2; i < vcount; i++) for (int i = 2; i < vcount; i++)
{ {
vert[2] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++)); vert[2] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++));
draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs); draw_shaded_triangle(vert, ccw, &args, thread);
vert[0] = vert[1]; vert[0] = vert[1];
vert[1] = vert[2]; vert[1] = vert[2];
ccw = !ccw; ccw = !ccw;
@ -173,7 +151,7 @@ ShadedTriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip,
return sv; return sv;
} }
void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr *drawfuncs, int num_drawfuncs) void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread)
{ {
// Cull, clip and generate additional vertices as needed // Cull, clip and generate additional vertices as needed
TriVertex clippedvert[max_additional_vertices]; TriVertex clippedvert[max_additional_vertices];
@ -250,8 +228,7 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool
args->v2 = &clippedvert[i - 1]; args->v2 = &clippedvert[i - 1];
args->v3 = &clippedvert[i - 2]; args->v3 = &clippedvert[i - 2];
for (int j = 0; j < num_drawfuncs; j++) ScreenTriangle::Draw(args, thread);
drawfuncs[j](args, thread);
} }
} }
else else
@ -262,8 +239,7 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool
args->v2 = &clippedvert[i - 1]; args->v2 = &clippedvert[i - 1];
args->v3 = &clippedvert[i]; args->v3 = &clippedvert[i];
for (int j = 0; j < num_drawfuncs; j++) ScreenTriangle::Draw(args, thread);
drawfuncs[j](args, thread);
} }
} }
} }
@ -444,8 +420,6 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread)
WorkerThreadData thread_data; WorkerThreadData thread_data;
thread_data.core = thread->core; thread_data.core = thread->core;
thread_data.num_cores = thread->num_cores; thread_data.num_cores = thread->num_cores;
thread_data.FullSpans = thread->FullSpansBuffer.data();
thread_data.PartialBlocks = thread->PartialBlocksBuffer.data();
PolyTriangleDrawer::draw_arrays(args, &thread_data); PolyTriangleDrawer::draw_arrays(args, &thread_data);
} }

View file

@ -46,7 +46,7 @@ public:
private: private:
static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v); static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v);
static void draw_arrays(const PolyDrawArgs &args, WorkerThreadData *thread); static void draw_arrays(const PolyDrawArgs &args, WorkerThreadData *thread);
static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr *drawfuncs, int num_drawfuncs); static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread);
static int clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert); static int clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert);

File diff suppressed because it is too large Load diff

View file

@ -28,34 +28,10 @@
class FString; class FString;
class PolyDrawArgs; class PolyDrawArgs;
struct TriFullSpan
{
uint16_t X;
uint16_t Y;
uint32_t Length;
};
struct TriPartialBlock
{
uint16_t X;
uint16_t Y;
uint32_t Mask0;
uint32_t Mask1;
};
struct WorkerThreadData struct WorkerThreadData
{ {
int32_t core; int32_t core;
int32_t num_cores; int32_t num_cores;
uint32_t *temp;
// Triangle working data:
TriFullSpan *FullSpans;
TriPartialBlock *PartialBlocks;
uint32_t NumFullSpans;
uint32_t NumPartialBlocks;
int32_t StartX;
int32_t StartY;
}; };
struct TriVertex struct TriVertex
@ -74,15 +50,14 @@ struct TriDrawTriangleArgs
TriVertex *v1; TriVertex *v1;
TriVertex *v2; TriVertex *v2;
TriVertex *v3; TriVertex *v3;
int32_t clipleft;
int32_t clipright; int32_t clipright;
int32_t cliptop;
int32_t clipbottom; int32_t clipbottom;
uint8_t *stencilValues; uint8_t *stencilValues;
uint32_t *stencilMasks; uint32_t *stencilMasks;
int32_t stencilPitch; int32_t stencilPitch;
uint32_t *subsectorGBuffer; uint32_t *subsectorGBuffer;
const PolyDrawArgs *uniforms; const PolyDrawArgs *uniforms;
bool destBgra;
}; };
class RectDrawArgs; class RectDrawArgs;
@ -116,13 +91,10 @@ enum class TriBlendMode
class ScreenTriangle class ScreenTriangle
{ {
public: public:
static void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
static void(*TriDrawers8[])(const TriDrawTriangleArgs *, WorkerThreadData *); static void(*TriDrawers8[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *);
static void(*TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *); static void(*TriDrawers32[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *);
static void(*RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *); static void(*RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
static void(*RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *); static void(*RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
}; };

View file

@ -23,7 +23,6 @@
#pragma once #pragma once
#include "r_draw.h" #include "r_draw.h"
#include "polyrenderer/drawers/screen_triangle.h"
#include <vector> #include <vector>
#include <memory> #include <memory>
#include <thread> #include <thread>
@ -37,12 +36,6 @@ EXTERN_CVAR(Bool, r_multithreaded)
class DrawerThread class DrawerThread
{ {
public: public:
DrawerThread()
{
FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8));
PartialBlocksBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8));
}
std::thread thread; std::thread thread;
size_t current_queue = 0; size_t current_queue = 0;
@ -55,10 +48,6 @@ public:
// Working buffer used by the tilted (sloped) span drawer // Working buffer used by the tilted (sloped) span drawer
const uint8_t *tiltlighting[MAXWIDTH]; const uint8_t *tiltlighting[MAXWIDTH];
// Working buffer used by the triangler drawer
std::vector<TriFullSpan> FullSpansBuffer;
std::vector<TriPartialBlock> PartialBlocksBuffer;
// Checks if a line is rendered by this thread // Checks if a line is rendered by this thread
bool line_skipped_by_thread(int line) bool line_skipped_by_thread(int line)
{ {