mirror of
https://github.com/ZDoom/gzdoom.git
synced 2025-02-18 10:11:11 +00:00
- Move depth testing out of span drawers
This commit is contained in:
parent
01fd404133
commit
fae514923a
1 changed files with 167 additions and 165 deletions
|
@ -1179,21 +1179,88 @@ void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, WorkerThreadD
|
|||
|
||||
// Draw the triangle:
|
||||
|
||||
if (args->destBgra)
|
||||
{
|
||||
auto drawfunc = (args->destBgra) ? DrawSpan32 : DrawSpan8;
|
||||
|
||||
float stepXW = args->gradientX.W;
|
||||
float v1X = args->v1->x;
|
||||
float v1Y = args->v1->y;
|
||||
float v1W = args->v1->w;
|
||||
|
||||
int num_cores = thread->num_cores;
|
||||
for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores)
|
||||
{
|
||||
DrawSpan32(y, leftEdge[y], rightEdge[y], args);
|
||||
}
|
||||
}
|
||||
else
|
||||
int x = leftEdge[y];
|
||||
int xend = rightEdge[y];
|
||||
|
||||
float *zbufferLine = args->zbuffer + args->stencilPitch * 8 * y;
|
||||
|
||||
float startX = x + (0.5f - v1X);
|
||||
float startY = y + (0.5f - v1Y);
|
||||
float posXW = v1W + stepXW * startX + args->gradientY.W * startY;
|
||||
|
||||
#ifndef NO_SSE
|
||||
__m128 mstepXW = _mm_set1_ps(stepXW * 4.0f);
|
||||
__m128 mfirstStepXW = _mm_setr_ps(0.0f, stepXW, stepXW + stepXW, stepXW + stepXW + stepXW);
|
||||
while (x < xend)
|
||||
{
|
||||
int num_cores = thread->num_cores;
|
||||
for (int y = topY + thread->skipped_by_thread(topY); y < bottomY; y += num_cores)
|
||||
int xstart = x;
|
||||
|
||||
int xendsse = x + ((xend - x) & ~3);
|
||||
__m128 mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW);
|
||||
while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 15 && x < xendsse)
|
||||
{
|
||||
DrawSpan8(y, leftEdge[y], rightEdge[y], args);
|
||||
_mm_storeu_ps(zbufferLine + x, mposXW);
|
||||
mposXW = _mm_add_ps(mposXW, mstepXW);
|
||||
x += 4;
|
||||
}
|
||||
posXW = _mm_cvtss_f32(mposXW);
|
||||
|
||||
while (zbufferLine[x] <= posXW && x < xend)
|
||||
{
|
||||
zbufferLine[x] = posXW;
|
||||
posXW += stepXW;
|
||||
x++;
|
||||
}
|
||||
|
||||
if (x > xstart)
|
||||
drawfunc(y, xstart, x, args);
|
||||
|
||||
xendsse = x + ((xend - x) & ~3);
|
||||
mposXW = _mm_add_ps(_mm_set1_ps(posXW), mfirstStepXW);
|
||||
while (_mm_movemask_ps(_mm_cmple_ps(_mm_loadu_ps(zbufferLine + x), mposXW)) == 0 && x < xendsse)
|
||||
{
|
||||
mposXW = _mm_add_ps(mposXW, mstepXW);
|
||||
x += 4;
|
||||
}
|
||||
posXW = _mm_cvtss_f32(mposXW);
|
||||
|
||||
while (zbufferLine[x] > posXW && x < xend)
|
||||
{
|
||||
posXW += stepXW;
|
||||
x++;
|
||||
}
|
||||
}
|
||||
#else
|
||||
while (x < xend)
|
||||
{
|
||||
int xstart = x;
|
||||
while (zbufferLine[x] <= posXW && x < xend)
|
||||
{
|
||||
zbufferLine[x] = posXW;
|
||||
posXW += stepXW;
|
||||
x++;
|
||||
}
|
||||
|
||||
if (x > xstart)
|
||||
drawfunc(y, xstart, x, args);
|
||||
|
||||
while (zbufferLine[x] > posXW && x < xend)
|
||||
{
|
||||
posXW += stepXW;
|
||||
x++;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1226,20 +1293,13 @@ void ScreenTriangle::DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs
|
|||
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
|
||||
light += light >> 7; // 255 -> 256
|
||||
|
||||
float *zbufferLine = args->zbuffer + args->stencilPitch * 8 * y;
|
||||
|
||||
uint32_t *dest = (uint32_t*)args->dest;
|
||||
uint32_t *destLine = dest + args->pitch * y;
|
||||
|
||||
int x = x0;
|
||||
int sseEnd = x0 + (x1 - x0) / 2 * 2;
|
||||
int sseEnd = x0 + ((x1 - x0) & ~3);
|
||||
while (x < sseEnd)
|
||||
{
|
||||
if (zbufferLine[x] <= posXW && zbufferLine[x + 1] <= posXW + stepXW)
|
||||
{
|
||||
zbufferLine[x] = posXW;
|
||||
zbufferLine[x + 1] = posXW + stepXW;
|
||||
|
||||
uint32_t fgcolor[2];
|
||||
int32_t lightshade[2];
|
||||
|
||||
|
@ -1288,51 +1348,8 @@ void ScreenTriangle::DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs
|
|||
|
||||
x += 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < 2; i++)
|
||||
{
|
||||
if (zbufferLine[x] <= posXW)
|
||||
{
|
||||
float rcpW = 0x01000000 / posXW;
|
||||
int32_t u = (int32_t)(posXU * rcpW);
|
||||
int32_t v = (int32_t)(posXV * rcpW);
|
||||
|
||||
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
|
||||
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
|
||||
uint32_t fgcolor = texPixels[texelX * texHeight + texelY];
|
||||
|
||||
uint32_t fgcolor_r = RPART(fgcolor);
|
||||
uint32_t fgcolor_g = GPART(fgcolor);
|
||||
uint32_t fgcolor_b = BPART(fgcolor);
|
||||
uint32_t fgcolor_a = APART(fgcolor);
|
||||
if (fgcolor_a > 127)
|
||||
{
|
||||
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * posXW), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
|
||||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
int lightshade = lightpos >> 8;
|
||||
|
||||
fgcolor_r = (fgcolor_r * lightshade) >> 8;
|
||||
fgcolor_g = (fgcolor_g * lightshade) >> 8;
|
||||
fgcolor_b = (fgcolor_b * lightshade) >> 8;
|
||||
|
||||
destLine[x] = 0xff000000 | (fgcolor_r << 16) | (fgcolor_g << 8) | fgcolor_b;
|
||||
}
|
||||
|
||||
zbufferLine[x] = posXW;
|
||||
}
|
||||
|
||||
posXW += stepXW;
|
||||
posXU += stepXU;
|
||||
posXV += stepXV;
|
||||
x++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while (x < x1)
|
||||
{
|
||||
if (zbufferLine[x] <= posXW)
|
||||
{
|
||||
float rcpW = 0x01000000 / posXW;
|
||||
int32_t u = (int32_t)(posXU * rcpW);
|
||||
|
@ -1359,9 +1376,6 @@ void ScreenTriangle::DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs
|
|||
destLine[x] = 0xff000000 | (fgcolor_r << 16) | (fgcolor_g << 8) | fgcolor_b;
|
||||
}
|
||||
|
||||
zbufferLine[x] = posXW;
|
||||
}
|
||||
|
||||
posXW += stepXW;
|
||||
posXU += stepXU;
|
||||
posXV += stepXV;
|
||||
|
@ -1398,15 +1412,11 @@ void ScreenTriangle::DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs
|
|||
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
|
||||
light += light >> 7; // 255 -> 256
|
||||
|
||||
float *zbufferLine = args->zbuffer + args->stencilPitch * 8 * y;
|
||||
|
||||
uint32_t *dest = (uint32_t*)args->dest;
|
||||
uint32_t *destLine = dest + args->pitch * y;
|
||||
|
||||
int x = x0;
|
||||
while (x < x1)
|
||||
{
|
||||
if (zbufferLine[x] <= posXW)
|
||||
{
|
||||
float rcpW = 0x01000000 / posXW;
|
||||
int32_t u = (int32_t)(posXU * rcpW);
|
||||
|
@ -1433,9 +1443,6 @@ void ScreenTriangle::DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs
|
|||
destLine[x] = 0xff000000 | (fgcolor_r << 16) | (fgcolor_g << 8) | fgcolor_b;
|
||||
}
|
||||
|
||||
zbufferLine[x] = posXW;
|
||||
}
|
||||
|
||||
posXW += stepXW;
|
||||
posXU += stepXU;
|
||||
posXV += stepXV;
|
||||
|
@ -1474,15 +1481,11 @@ void ScreenTriangle::DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs
|
|||
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
|
||||
light += light >> 7; // 255 -> 256
|
||||
|
||||
float *zbufferLine = args->zbuffer + args->stencilPitch * 8 * y;
|
||||
|
||||
uint8_t *dest = (uint8_t*)args->dest;
|
||||
uint8_t *destLine = dest + args->pitch * y;
|
||||
|
||||
int x = x0;
|
||||
while (x < x1)
|
||||
{
|
||||
if (zbufferLine[x] <= posXW)
|
||||
{
|
||||
float rcpW = 0x01000000 / posXW;
|
||||
int32_t u = (int32_t)(posXU * rcpW);
|
||||
|
@ -1501,7 +1504,6 @@ void ScreenTriangle::DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs
|
|||
|
||||
if (fgcolor != 0)
|
||||
destLine[x] = shadedfg;
|
||||
}
|
||||
|
||||
posXW += stepXW;
|
||||
posXU += stepXU;
|
||||
|
|
Loading…
Reference in a new issue