mirror of
https://github.com/ZDoom/qzdoom.git
synced 2025-01-21 00:20:56 +00:00
- clean up the triangle setup functions
This commit is contained in:
parent
f48af606df
commit
6db89a2ce1
7 changed files with 785 additions and 1034 deletions
|
@ -268,7 +268,7 @@ template<typename BlendT, typename SamplerT>
|
|||
class TriScreenDrawer32
|
||||
{
|
||||
public:
|
||||
static void Execute(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||
static void Execute(int x, int y, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
|
||||
{
|
||||
using namespace TriScreenDrawerModes;
|
||||
|
||||
|
@ -281,44 +281,37 @@ public:
|
|||
if (is_simple_shade)
|
||||
{
|
||||
if (is_nearest_filter)
|
||||
Loop<SimpleShade, NearestFilter>(args, thread);
|
||||
DrawBlock<SimpleShade, NearestFilter>(x, y, mask0, mask1, args);
|
||||
else
|
||||
Loop<SimpleShade, LinearFilter>(args, thread);
|
||||
DrawBlock<SimpleShade, LinearFilter>(x, y, mask0, mask1, args);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (is_nearest_filter)
|
||||
Loop<AdvancedShade, NearestFilter>(args, thread);
|
||||
DrawBlock<AdvancedShade, NearestFilter>(x, y, mask0, mask1, args);
|
||||
else
|
||||
Loop<AdvancedShade, LinearFilter>(args, thread);
|
||||
DrawBlock<AdvancedShade, LinearFilter>(x, y, mask0, mask1, args);
|
||||
}
|
||||
}
|
||||
else // no linear filtering for translated, shaded, stencil, fill or skycap
|
||||
{
|
||||
if (is_simple_shade)
|
||||
{
|
||||
Loop<SimpleShade, NearestFilter>(args, thread);
|
||||
DrawBlock<SimpleShade, NearestFilter>(x, y, mask0, mask1, args);
|
||||
}
|
||||
else
|
||||
{
|
||||
Loop<AdvancedShade, NearestFilter>(args, thread);
|
||||
DrawBlock<AdvancedShade, NearestFilter>(x, y, mask0, mask1, args);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename ShadeModeT, typename FilterModeT>
|
||||
FORCEINLINE static void VECTORCALL Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||
FORCEINLINE static void VECTORCALL DrawBlock(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
|
||||
{
|
||||
using namespace TriScreenDrawerModes;
|
||||
|
||||
int numSpans = thread->NumFullSpans;
|
||||
auto fullSpans = thread->FullSpans;
|
||||
int numBlocks = thread->NumPartialBlocks;
|
||||
auto partialBlocks = thread->PartialBlocks;
|
||||
int startX = thread->StartX;
|
||||
int startY = thread->StartY;
|
||||
|
||||
bool is_fixed_light = args->uniforms->FixedLight();
|
||||
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff;
|
||||
uint32_t srcalpha = args->uniforms->SrcAlpha();
|
||||
|
@ -330,20 +323,24 @@ private:
|
|||
const TriVertex &v3 = *args->v3;
|
||||
ScreenTriangleStepVariables gradientX;
|
||||
ScreenTriangleStepVariables gradientY;
|
||||
ScreenTriangleStepVariables start;
|
||||
ScreenTriangleStepVariables blockPosY;
|
||||
gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
|
||||
gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
|
||||
gradientX.U = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w);
|
||||
gradientY.U = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w);
|
||||
gradientX.V = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w);
|
||||
gradientY.V = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w);
|
||||
start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y);
|
||||
start.U = v1.u * v1.w + gradientX.U * (startX - v1.x) + gradientY.U * (startY - v1.y);
|
||||
start.V = v1.v * v1.w + gradientX.V * (startX - v1.x) + gradientY.V * (startY - v1.y);
|
||||
blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y);
|
||||
blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y);
|
||||
blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y);
|
||||
gradientX.W *= 8.0f;
|
||||
gradientX.U *= 8.0f;
|
||||
gradientX.V *= 8.0f;
|
||||
|
||||
// Output
|
||||
uint32_t * RESTRICT destOrg = (uint32_t*)args->dest;
|
||||
int pitch = args->pitch;
|
||||
uint32_t *dest = destOrg + destX + destY * pitch;
|
||||
|
||||
// Light
|
||||
uint32_t light = args->uniforms->Light();
|
||||
|
@ -388,93 +385,78 @@ private:
|
|||
desaturate = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numSpans; i++)
|
||||
if (mask0 == 0xffffffff && mask1 == 0xffffffff)
|
||||
{
|
||||
const auto &span = fullSpans[i];
|
||||
|
||||
uint32_t *dest = destOrg + span.X + span.Y * pitch;
|
||||
int width = span.Length;
|
||||
int height = 8;
|
||||
|
||||
ScreenTriangleStepVariables blockPosY;
|
||||
blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY);
|
||||
blockPosY.U = start.U + gradientX.U * (span.X - startX) + gradientY.U * (span.Y - startY);
|
||||
blockPosY.V = start.V + gradientX.V * (span.X - startX) + gradientY.V * (span.Y - startY);
|
||||
|
||||
for (int y = 0; y < height; y++)
|
||||
for (int y = 0; y < 8; y++)
|
||||
{
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
|
||||
float rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t posU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosX.V * rcpW);
|
||||
float rcpW = 0x01000000 / blockPosY.W;
|
||||
int32_t posU = (int32_t)(blockPosY.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosY.V * rcpW);
|
||||
|
||||
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
for (int x = 0; x < width; x++)
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
blockPosX.U += gradientX.U;
|
||||
blockPosX.V += gradientX.V;
|
||||
|
||||
rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
|
||||
int32_t stepU = (nextU - posU) / 8;
|
||||
int32_t stepV = (nextV - posV) / 8;
|
||||
|
||||
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
fixed_t lightstep = (lightnext - lightpos) / 8;
|
||||
lightstep = lightstep & lightmask;
|
||||
|
||||
for (int ix = 0; ix < 4; ix++)
|
||||
{
|
||||
blockPosX.W += gradientX.W * 8;
|
||||
blockPosX.U += gradientX.U * 8;
|
||||
blockPosX.V += gradientX.V * 8;
|
||||
// Load bgcolor
|
||||
__m128i bgcolor;
|
||||
if (BlendT::Mode != (int)BlendModes::Opaque)
|
||||
bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + ix * 2)), _mm_setzero_si128());
|
||||
else
|
||||
bgcolor = _mm_setzero_si128();
|
||||
|
||||
rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
|
||||
int32_t stepU = (nextU - posU) / 8;
|
||||
int32_t stepV = (nextV - posV) / 8;
|
||||
// Sample fgcolor
|
||||
unsigned int ifgcolor[2], ifgshade[2];
|
||||
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
|
||||
posU += stepU;
|
||||
posV += stepV;
|
||||
|
||||
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
fixed_t lightstep = (lightnext - lightpos) / 8;
|
||||
lightstep = lightstep & lightmask;
|
||||
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
|
||||
posU += stepU;
|
||||
posV += stepV;
|
||||
|
||||
for (int ix = 0; ix < 4; ix++)
|
||||
// Setup light
|
||||
int lightpos0 = lightpos >> 8;
|
||||
lightpos += lightstep;
|
||||
int lightpos1 = lightpos >> 8;
|
||||
lightpos += lightstep;
|
||||
__m128i mlight = _mm_set_epi16(256, lightpos1, lightpos1, lightpos1, 256, lightpos0, lightpos0, lightpos0);
|
||||
|
||||
__m128i shade_fade_lit;
|
||||
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
|
||||
{
|
||||
// Load bgcolor
|
||||
__m128i bgcolor;
|
||||
if (BlendT::Mode != (int)BlendModes::Opaque)
|
||||
bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + x * 8 + ix * 2)), _mm_setzero_si128());
|
||||
else
|
||||
bgcolor = _mm_setzero_si128();
|
||||
|
||||
// Sample fgcolor
|
||||
unsigned int ifgcolor[2], ifgshade[2];
|
||||
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
|
||||
posU += stepU;
|
||||
posV += stepV;
|
||||
|
||||
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
|
||||
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
|
||||
posU += stepU;
|
||||
posV += stepV;
|
||||
|
||||
// Setup light
|
||||
int lightpos0 = lightpos >> 8;
|
||||
lightpos += lightstep;
|
||||
int lightpos1 = lightpos >> 8;
|
||||
lightpos += lightstep;
|
||||
__m128i mlight = _mm_set_epi16(256, lightpos1, lightpos1, lightpos1, 256, lightpos0, lightpos0, lightpos0);
|
||||
|
||||
__m128i shade_fade_lit;
|
||||
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
|
||||
{
|
||||
__m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight);
|
||||
shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light);
|
||||
}
|
||||
else
|
||||
{
|
||||
shade_fade_lit = _mm_setzero_si128();
|
||||
}
|
||||
|
||||
// Shade and blend
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||
|
||||
// Store result
|
||||
_mm_storel_epi64((__m128i*)(dest + x * 8 + ix * 2), outcolor);
|
||||
__m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight);
|
||||
shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light);
|
||||
}
|
||||
else
|
||||
{
|
||||
shade_fade_lit = _mm_setzero_si128();
|
||||
}
|
||||
|
||||
// Shade and blend
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
|
||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||
|
||||
// Store result
|
||||
_mm_storel_epi64((__m128i*)(dest + ix * 2), outcolor);
|
||||
}
|
||||
|
||||
blockPosY.W += gradientY.W;
|
||||
|
@ -484,35 +466,22 @@ private:
|
|||
dest += pitch;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < numBlocks; i++)
|
||||
else
|
||||
{
|
||||
const auto &block = partialBlocks[i];
|
||||
|
||||
ScreenTriangleStepVariables blockPosY;
|
||||
blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY);
|
||||
blockPosY.U = start.U + gradientX.U * (block.X - startX) + gradientY.U * (block.Y - startY);
|
||||
blockPosY.V = start.V + gradientX.V * (block.X - startX) + gradientY.V * (block.Y - startY);
|
||||
|
||||
uint32_t *dest = destOrg + block.X + block.Y * pitch;
|
||||
uint32_t mask0 = block.Mask0;
|
||||
uint32_t mask1 = block.Mask1;
|
||||
|
||||
// mask0 loop:
|
||||
for (int y = 0; y < 4; y++)
|
||||
{
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
|
||||
float rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t posU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosX.V * rcpW);
|
||||
float rcpW = 0x01000000 / blockPosY.W;
|
||||
int32_t posU = (int32_t)(blockPosY.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosY.V * rcpW);
|
||||
|
||||
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
blockPosX.W += gradientX.W * 8;
|
||||
blockPosX.U += gradientX.U * 8;
|
||||
blockPosX.V += gradientX.V * 8;
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
blockPosX.U += gradientX.U;
|
||||
blockPosX.V += gradientX.V;
|
||||
|
||||
rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
|
||||
|
@ -590,18 +559,17 @@ private:
|
|||
// mask1 loop:
|
||||
for (int y = 0; y < 4; y++)
|
||||
{
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
|
||||
float rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t posU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosX.V * rcpW);
|
||||
float rcpW = 0x01000000 / blockPosY.W;
|
||||
int32_t posU = (int32_t)(blockPosY.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosY.V * rcpW);
|
||||
|
||||
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
blockPosX.W += gradientX.W * 8;
|
||||
blockPosX.U += gradientX.U * 8;
|
||||
blockPosX.V += gradientX.V * 8;
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
blockPosX.U += gradientX.U;
|
||||
blockPosX.V += gradientX.V;
|
||||
|
||||
rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
|
||||
|
|
|
@ -209,17 +209,10 @@ template<typename BlendT, typename SamplerT>
|
|||
class TriScreenDrawer8
|
||||
{
|
||||
public:
|
||||
static void Execute(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||
static void Execute(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
|
||||
{
|
||||
using namespace TriScreenDrawerModes;
|
||||
|
||||
int numSpans = thread->NumFullSpans;
|
||||
auto fullSpans = thread->FullSpans;
|
||||
int numBlocks = thread->NumPartialBlocks;
|
||||
auto partialBlocks = thread->PartialBlocks;
|
||||
int startX = thread->StartX;
|
||||
int startY = thread->StartY;
|
||||
|
||||
bool is_fixed_light = args->uniforms->FixedLight();
|
||||
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff;
|
||||
auto colormaps = args->uniforms->BaseColormap();
|
||||
|
@ -232,20 +225,24 @@ public:
|
|||
const TriVertex &v3 = *args->v3;
|
||||
ScreenTriangleStepVariables gradientX;
|
||||
ScreenTriangleStepVariables gradientY;
|
||||
ScreenTriangleStepVariables start;
|
||||
ScreenTriangleStepVariables blockPosY;
|
||||
gradientX.W = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
|
||||
gradientY.W = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.w, v2.w, v3.w);
|
||||
gradientX.U = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w);
|
||||
gradientY.U = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.u * v1.w, v2.u * v2.w, v3.u * v3.w);
|
||||
gradientX.V = FindGradientX(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w);
|
||||
gradientY.V = FindGradientY(v1.x, v1.y, v2.x, v2.y, v3.x, v3.y, v1.v * v1.w, v2.v * v2.w, v3.v * v3.w);
|
||||
start.W = v1.w + gradientX.W * (startX - v1.x) + gradientY.W * (startY - v1.y);
|
||||
start.U = v1.u * v1.w + gradientX.U * (startX - v1.x) + gradientY.U * (startY - v1.y);
|
||||
start.V = v1.v * v1.w + gradientX.V * (startX - v1.x) + gradientY.V * (startY - v1.y);
|
||||
blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y);
|
||||
blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y);
|
||||
blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y);
|
||||
gradientX.W *= 8.0f;
|
||||
gradientX.U *= 8.0f;
|
||||
gradientX.V *= 8.0f;
|
||||
|
||||
// Output
|
||||
uint8_t * RESTRICT destOrg = args->dest;
|
||||
int pitch = args->pitch;
|
||||
uint8_t *dest = destOrg + destX + destY * pitch;
|
||||
|
||||
// Light
|
||||
uint32_t light = args->uniforms->Light();
|
||||
|
@ -260,57 +257,42 @@ public:
|
|||
uint32_t texWidth = args->uniforms->TextureWidth();
|
||||
uint32_t texHeight = args->uniforms->TextureHeight();
|
||||
|
||||
for (int i = 0; i < numSpans; i++)
|
||||
if (mask0 == 0xffffffff && mask1 == 0xffffffff)
|
||||
{
|
||||
const auto &span = fullSpans[i];
|
||||
|
||||
uint8_t *dest = destOrg + span.X + span.Y * pitch;
|
||||
int width = span.Length;
|
||||
int height = 8;
|
||||
|
||||
ScreenTriangleStepVariables blockPosY;
|
||||
blockPosY.W = start.W + gradientX.W * (span.X - startX) + gradientY.W * (span.Y - startY);
|
||||
blockPosY.U = start.U + gradientX.U * (span.X - startX) + gradientY.U * (span.Y - startY);
|
||||
blockPosY.V = start.V + gradientX.V * (span.X - startX) + gradientY.V * (span.Y - startY);
|
||||
|
||||
for (int y = 0; y < height; y++)
|
||||
for (int y = 0; y < 8; y++)
|
||||
{
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
|
||||
float rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t posU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosX.V * rcpW);
|
||||
float rcpW = 0x01000000 / blockPosY.W;
|
||||
int32_t posU = (int32_t)(blockPosY.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosY.V * rcpW);
|
||||
|
||||
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
for (int x = 0; x < width; x++)
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
blockPosX.U += gradientX.U;
|
||||
blockPosX.V += gradientX.V;
|
||||
|
||||
rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
|
||||
int32_t stepU = (nextU - posU) / 8;
|
||||
int32_t stepV = (nextV - posV) / 8;
|
||||
|
||||
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
fixed_t lightstep = (lightnext - lightpos) / 8;
|
||||
lightstep = lightstep & lightmask;
|
||||
|
||||
for (int ix = 0; ix < 8; ix++)
|
||||
{
|
||||
blockPosX.W += gradientX.W * 8;
|
||||
blockPosX.U += gradientX.U * 8;
|
||||
blockPosX.V += gradientX.V * 8;
|
||||
|
||||
rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
|
||||
int32_t stepU = (nextU - posU) / 8;
|
||||
int32_t stepV = (nextV - posV) / 8;
|
||||
|
||||
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
fixed_t lightstep = (lightnext - lightpos) / 8;
|
||||
lightstep = lightstep & lightmask;
|
||||
|
||||
for (int ix = 0; ix < 8; ix++)
|
||||
{
|
||||
int lightshade = lightpos >> 8;
|
||||
uint8_t bgcolor = dest[x * 8 + ix];
|
||||
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
|
||||
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
|
||||
dest[x * 8 + ix] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
|
||||
posU += stepU;
|
||||
posV += stepV;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
int lightshade = lightpos >> 8;
|
||||
uint8_t bgcolor = dest[ix];
|
||||
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
|
||||
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
|
||||
dest[ix] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
|
||||
posU += stepU;
|
||||
posV += stepV;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
|
||||
blockPosY.W += gradientY.W;
|
||||
|
@ -320,35 +302,22 @@ public:
|
|||
dest += pitch;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < numBlocks; i++)
|
||||
else
|
||||
{
|
||||
const auto &block = partialBlocks[i];
|
||||
|
||||
ScreenTriangleStepVariables blockPosY;
|
||||
blockPosY.W = start.W + gradientX.W * (block.X - startX) + gradientY.W * (block.Y - startY);
|
||||
blockPosY.U = start.U + gradientX.U * (block.X - startX) + gradientY.U * (block.Y - startY);
|
||||
blockPosY.V = start.V + gradientX.V * (block.X - startX) + gradientY.V * (block.Y - startY);
|
||||
|
||||
uint8_t *dest = destOrg + block.X + block.Y * pitch;
|
||||
uint32_t mask0 = block.Mask0;
|
||||
uint32_t mask1 = block.Mask1;
|
||||
|
||||
// mask0 loop:
|
||||
for (int y = 0; y < 4; y++)
|
||||
{
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
|
||||
float rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t posU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosX.V * rcpW);
|
||||
float rcpW = 0x01000000 / blockPosY.W;
|
||||
int32_t posU = (int32_t)(blockPosY.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosY.V * rcpW);
|
||||
|
||||
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
blockPosX.W += gradientX.W * 8;
|
||||
blockPosX.U += gradientX.U * 8;
|
||||
blockPosX.V += gradientX.V * 8;
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
blockPosX.U += gradientX.U;
|
||||
blockPosX.V += gradientX.V;
|
||||
|
||||
rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
|
||||
|
@ -388,18 +357,17 @@ public:
|
|||
// mask1 loop:
|
||||
for (int y = 0; y < 4; y++)
|
||||
{
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
|
||||
float rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t posU = (int32_t)(blockPosX.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosX.V * rcpW);
|
||||
float rcpW = 0x01000000 / blockPosY.W;
|
||||
int32_t posU = (int32_t)(blockPosY.U * rcpW);
|
||||
int32_t posV = (int32_t)(blockPosY.V * rcpW);
|
||||
|
||||
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
blockPosX.W += gradientX.W * 8;
|
||||
blockPosX.U += gradientX.U * 8;
|
||||
blockPosX.V += gradientX.V * 8;
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
blockPosX.U += gradientX.U;
|
||||
blockPosX.V += gradientX.V;
|
||||
|
||||
rcpW = 0x01000000 / blockPosX.W;
|
||||
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
|
||||
|
|
|
@ -38,8 +38,6 @@
|
|||
#include "swrenderer/drawers/r_draw_rgba.h"
|
||||
#include "screen_triangle.h"
|
||||
|
||||
CVAR(Bool, r_debug_trisetup, false, 0);
|
||||
|
||||
int PolyTriangleDrawer::viewport_x;
|
||||
int PolyTriangleDrawer::viewport_y;
|
||||
int PolyTriangleDrawer::viewport_width;
|
||||
|
@ -90,33 +88,13 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
|
|||
if (drawargs.VertexCount() < 3)
|
||||
return;
|
||||
|
||||
PolyDrawFuncPtr drawfuncs[4];
|
||||
int num_drawfuncs = 0;
|
||||
|
||||
drawfuncs[num_drawfuncs++] = drawargs.SubsectorTest() ? &ScreenTriangle::SetupSubsector : &ScreenTriangle::SetupNormal;
|
||||
|
||||
if (!r_debug_trisetup) // For profiling how much time is spent in setup vs drawal
|
||||
{
|
||||
int bmode = (int)drawargs.BlendMode();
|
||||
|
||||
if (drawargs.WriteColor())
|
||||
drawfuncs[num_drawfuncs++] = dest_bgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode];
|
||||
}
|
||||
|
||||
if (drawargs.WriteStencil())
|
||||
drawfuncs[num_drawfuncs++] = &ScreenTriangle::StencilWrite;
|
||||
|
||||
if (drawargs.WriteSubsector())
|
||||
drawfuncs[num_drawfuncs++] = &ScreenTriangle::SubsectorWrite;
|
||||
|
||||
TriDrawTriangleArgs args;
|
||||
args.dest = dest;
|
||||
args.pitch = dest_pitch;
|
||||
args.clipleft = 0;
|
||||
args.clipright = dest_width;
|
||||
args.cliptop = 0;
|
||||
args.clipbottom = dest_height;
|
||||
args.uniforms = &drawargs;
|
||||
args.destBgra = dest_bgra;
|
||||
args.stencilPitch = PolyStencilBuffer::Instance()->BlockWidth();
|
||||
args.stencilValues = PolyStencilBuffer::Instance()->Values();
|
||||
args.stencilMasks = PolyStencilBuffer::Instance()->Masks();
|
||||
|
@ -133,7 +111,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
|
|||
{
|
||||
for (int j = 0; j < 3; j++)
|
||||
vert[j] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++));
|
||||
draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs);
|
||||
draw_shaded_triangle(vert, ccw, &args, thread);
|
||||
}
|
||||
}
|
||||
else if (drawargs.DrawMode() == PolyDrawMode::TriangleFan)
|
||||
|
@ -143,7 +121,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
|
|||
for (int i = 2; i < vcount; i++)
|
||||
{
|
||||
vert[2] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++));
|
||||
draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs);
|
||||
draw_shaded_triangle(vert, ccw, &args, thread);
|
||||
vert[1] = vert[2];
|
||||
}
|
||||
}
|
||||
|
@ -154,7 +132,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, WorkerThreadD
|
|||
for (int i = 2; i < vcount; i++)
|
||||
{
|
||||
vert[2] = shade_vertex(*drawargs.ObjectToClip(), drawargs.ClipPlane(), *(vinput++));
|
||||
draw_shaded_triangle(vert, ccw, &args, thread, drawfuncs, num_drawfuncs);
|
||||
draw_shaded_triangle(vert, ccw, &args, thread);
|
||||
vert[0] = vert[1];
|
||||
vert[1] = vert[2];
|
||||
ccw = !ccw;
|
||||
|
@ -173,7 +151,7 @@ ShadedTriVertex PolyTriangleDrawer::shade_vertex(const TriMatrix &objectToClip,
|
|||
return sv;
|
||||
}
|
||||
|
||||
void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr *drawfuncs, int num_drawfuncs)
|
||||
void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread)
|
||||
{
|
||||
// Cull, clip and generate additional vertices as needed
|
||||
TriVertex clippedvert[max_additional_vertices];
|
||||
|
@ -249,9 +227,8 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool
|
|||
args->v1 = &clippedvert[numclipvert - 1];
|
||||
args->v2 = &clippedvert[i - 1];
|
||||
args->v3 = &clippedvert[i - 2];
|
||||
|
||||
for (int j = 0; j < num_drawfuncs; j++)
|
||||
drawfuncs[j](args, thread);
|
||||
|
||||
ScreenTriangle::Draw(args, thread);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -262,8 +239,7 @@ void PolyTriangleDrawer::draw_shaded_triangle(const ShadedTriVertex *vert, bool
|
|||
args->v2 = &clippedvert[i - 1];
|
||||
args->v3 = &clippedvert[i];
|
||||
|
||||
for (int j = 0; j < num_drawfuncs; j++)
|
||||
drawfuncs[j](args, thread);
|
||||
ScreenTriangle::Draw(args, thread);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -444,8 +420,6 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread)
|
|||
WorkerThreadData thread_data;
|
||||
thread_data.core = thread->core;
|
||||
thread_data.num_cores = thread->num_cores;
|
||||
thread_data.FullSpans = thread->FullSpansBuffer.data();
|
||||
thread_data.PartialBlocks = thread->PartialBlocksBuffer.data();
|
||||
|
||||
PolyTriangleDrawer::draw_arrays(args, &thread_data);
|
||||
}
|
||||
|
|
|
@ -46,7 +46,7 @@ public:
|
|||
private:
|
||||
static ShadedTriVertex shade_vertex(const TriMatrix &objectToClip, const float *clipPlane, const TriVertex &v);
|
||||
static void draw_arrays(const PolyDrawArgs &args, WorkerThreadData *thread);
|
||||
static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread, PolyDrawFuncPtr *drawfuncs, int num_drawfuncs);
|
||||
static void draw_shaded_triangle(const ShadedTriVertex *vertices, bool ccw, TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||
|
||||
static int clipedge(const ShadedTriVertex *verts, TriVertex *clippedvert);
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -28,34 +28,10 @@
|
|||
class FString;
|
||||
class PolyDrawArgs;
|
||||
|
||||
struct TriFullSpan
|
||||
{
|
||||
uint16_t X;
|
||||
uint16_t Y;
|
||||
uint32_t Length;
|
||||
};
|
||||
|
||||
struct TriPartialBlock
|
||||
{
|
||||
uint16_t X;
|
||||
uint16_t Y;
|
||||
uint32_t Mask0;
|
||||
uint32_t Mask1;
|
||||
};
|
||||
|
||||
struct WorkerThreadData
|
||||
{
|
||||
int32_t core;
|
||||
int32_t num_cores;
|
||||
uint32_t *temp;
|
||||
|
||||
// Triangle working data:
|
||||
TriFullSpan *FullSpans;
|
||||
TriPartialBlock *PartialBlocks;
|
||||
uint32_t NumFullSpans;
|
||||
uint32_t NumPartialBlocks;
|
||||
int32_t StartX;
|
||||
int32_t StartY;
|
||||
};
|
||||
|
||||
struct TriVertex
|
||||
|
@ -74,15 +50,14 @@ struct TriDrawTriangleArgs
|
|||
TriVertex *v1;
|
||||
TriVertex *v2;
|
||||
TriVertex *v3;
|
||||
int32_t clipleft;
|
||||
int32_t clipright;
|
||||
int32_t cliptop;
|
||||
int32_t clipbottom;
|
||||
uint8_t *stencilValues;
|
||||
uint32_t *stencilMasks;
|
||||
int32_t stencilPitch;
|
||||
uint32_t *subsectorGBuffer;
|
||||
const PolyDrawArgs *uniforms;
|
||||
bool destBgra;
|
||||
};
|
||||
|
||||
class RectDrawArgs;
|
||||
|
@ -116,13 +91,10 @@ enum class TriBlendMode
|
|||
class ScreenTriangle
|
||||
{
|
||||
public:
|
||||
static void SetupNormal(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||
static void SetupSubsector(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||
static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||
static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||
static void Draw(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
|
||||
|
||||
static void(*TriDrawers8[])(const TriDrawTriangleArgs *, WorkerThreadData *);
|
||||
static void(*TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *);
|
||||
static void(*TriDrawers8[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *);
|
||||
static void(*TriDrawers32[])(int, int, uint32_t, uint32_t, const TriDrawTriangleArgs *);
|
||||
static void(*RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
|
||||
static void(*RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
|
||||
};
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#pragma once
|
||||
|
||||
#include "r_draw.h"
|
||||
#include "polyrenderer/drawers/screen_triangle.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <thread>
|
||||
|
@ -37,12 +36,6 @@ EXTERN_CVAR(Bool, r_multithreaded)
|
|||
class DrawerThread
|
||||
{
|
||||
public:
|
||||
DrawerThread()
|
||||
{
|
||||
FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8));
|
||||
PartialBlocksBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8));
|
||||
}
|
||||
|
||||
std::thread thread;
|
||||
size_t current_queue = 0;
|
||||
|
||||
|
@ -55,10 +48,6 @@ public:
|
|||
// Working buffer used by the tilted (sloped) span drawer
|
||||
const uint8_t *tiltlighting[MAXWIDTH];
|
||||
|
||||
// Working buffer used by the triangler drawer
|
||||
std::vector<TriFullSpan> FullSpansBuffer;
|
||||
std::vector<TriPartialBlock> PartialBlocksBuffer;
|
||||
|
||||
// Checks if a line is rendered by this thread
|
||||
bool line_skipped_by_thread(int line)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue