Implement the other light modes and fix dynamic lights being calculated twice(!)

This commit is contained in:
Magnus Norddahl 2019-12-15 20:05:13 +01:00
parent b44c40a63d
commit c5717d98db
4 changed files with 239 additions and 131 deletions

View file

@ -111,8 +111,7 @@ public:
uint8_t vColorB[MAXWIDTH];
float GradientdistZ[MAXWIDTH];
uint32_t FragColor[MAXWIDTH];
uint16_t lightarray[MAXWIDTH];
uint32_t dynlights[MAXWIDTH];
uint32_t lightarray[MAXWIDTH];
uint8_t discard[MAXWIDTH];
} scanline;

View file

@ -35,7 +35,7 @@ static const int shiftTable[] = {
#if 1 //#ifndef USE_AVX2
template<typename OptT>
static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
FRenderStyle style = thread->RenderStyle;
@ -104,7 +104,7 @@ static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
}
#else
template<typename OptT>
static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
FRenderStyle style = thread->RenderStyle;
@ -177,7 +177,7 @@ static void BlendColor(int y, int x0, int x1, PolyTriangleThreadData* thread)
#endif
#ifdef NO_SSE
static void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* dest = (uint32_t*)thread->dest;
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
@ -186,7 +186,7 @@ static void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thre
memcpy(line + x0, fragcolor + x0, (x1 - x0) * sizeof(uint32_t));
}
#else
static void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* dest = (uint32_t*)thread->dest;
uint32_t* line = dest + y * (ptrdiff_t)thread->dest_pitch;
@ -208,7 +208,7 @@ static void BlendColorOpaque(int y, int x0, int x1, PolyTriangleThreadData* thre
}
#endif
static void BlendColorAdd_Src_InvSrc(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorAdd_Src_InvSrc(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
@ -250,7 +250,7 @@ static void BlendColorAdd_Src_InvSrc(int y, int x0, int x1, PolyTriangleThreadDa
}
}
static void BlendColorAdd_SrcCol_InvSrcCol(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorAdd_SrcCol_InvSrcCol(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
@ -301,7 +301,7 @@ static void BlendColorAdd_SrcCol_InvSrcCol(int y, int x0, int x1, PolyTriangleTh
}
}
static void BlendColorAdd_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorAdd_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
@ -341,7 +341,7 @@ static void BlendColorAdd_Src_One(int y, int x0, int x1, PolyTriangleThreadData*
}
}
static void BlendColorAdd_SrcCol_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorAdd_SrcCol_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
@ -387,7 +387,7 @@ static void BlendColorAdd_SrcCol_One(int y, int x0, int x1, PolyTriangleThreadDa
}
}
static void BlendColorAdd_DstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorAdd_DstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
@ -433,7 +433,7 @@ static void BlendColorAdd_DstCol_Zero(int y, int x0, int x1, PolyTriangleThreadD
}
}
static void BlendColorAdd_InvDstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorAdd_InvDstCol_Zero(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;
@ -479,7 +479,7 @@ static void BlendColorAdd_InvDstCol_Zero(int y, int x0, int x1, PolyTriangleThre
}
}
static void BlendColorRevSub_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
void BlendColorRevSub_Src_One(int y, int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* line = (uint32_t*)thread->dest + y * (ptrdiff_t)thread->dest_pitch;
uint32_t* fragcolor = thread->scanline.FragColor;

View file

@ -86,7 +86,7 @@ static void WriteDynLightArray(int x0, int x1, PolyTriangleThreadData* thread)
float worldnormalY = thread->mainVertexShader.vWorldNormal.Y;
float worldnormalZ = thread->mainVertexShader.vWorldNormal.Z;
uint32_t* dynlights = thread->scanline.dynlights;
uint32_t* lightarray = thread->scanline.lightarray;
float* worldposX = thread->scanline.WorldX;
float* worldposY = thread->scanline.WorldY;
float* worldposZ = thread->scanline.WorldZ;
@ -103,9 +103,9 @@ static void WriteDynLightArray(int x0, int x1, PolyTriangleThreadData* thread)
for (int x = x0; x < sseend; x += 4)
{
__m128i litlo = _mm_setzero_si128();
//__m128i litlo = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(dynlightcolor), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
__m128i lithi = litlo;
__m128i lit = _mm_loadu_si128((__m128i*)&lightarray[x]);
__m128i litlo = _mm_unpacklo_epi8(lit, _mm_setzero_si128());
__m128i lithi = _mm_unpackhi_epi8(lit, _mm_setzero_si128());
for (int i = 0; i < num_lights; i++)
{
@ -150,15 +150,16 @@ static void WriteDynLightArray(int x0, int x1, PolyTriangleThreadData* thread)
lithi = _mm_add_epi16(lithi, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenhi), 8));
}
_mm_storeu_si128((__m128i*)&dynlights[x], _mm_packus_epi16(litlo, lithi));
_mm_storeu_si128((__m128i*)&lightarray[x], _mm_packus_epi16(litlo, lithi));
}
#endif
for (int x = x0; x < x1; x++)
for (int x = sseend; x < x1; x++)
{
uint32_t lit_r = 0;
uint32_t lit_g = 0;
uint32_t lit_b = 0;
uint32_t lit_a = APART(lightarray[x]);
uint32_t lit_r = RPART(lightarray[x]);
uint32_t lit_g = GPART(lightarray[x]);
uint32_t lit_b = BPART(lightarray[x]);
for (int i = 0; i < num_lights; i++)
{
@ -209,7 +210,7 @@ static void WriteDynLightArray(int x0, int x1, PolyTriangleThreadData* thread)
lit_r = MIN<uint32_t>(lit_r, 255);
lit_g = MIN<uint32_t>(lit_g, 255);
lit_b = MIN<uint32_t>(lit_b, 255);
dynlights[x] = MAKEARGB(255, lit_r, lit_g, lit_b);
lightarray[x] = MAKEARGB(lit_a, lit_r, lit_g, lit_b);
// Palette version:
// dynlights[x] = RGB256k.All[((lit_r >> 2) << 12) | ((lit_g >> 2) << 6) | (lit_b >> 2)];
@ -217,6 +218,15 @@ static void WriteDynLightArray(int x0, int x1, PolyTriangleThreadData* thread)
}
static void WriteLightArray(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyTriangleThreadData* thread)
{
auto constants = thread->PushConstants;
auto vColorR = thread->scanline.vColorR;
auto vColorG = thread->scanline.vColorG;
auto vColorB = thread->scanline.vColorB;
auto vColorA = thread->scanline.vColorA;
if (thread->PushConstants->uLightLevel >= 0.0f)
{
float startX = x0 + (0.5f - args->v1->x);
float startY = y + (0.5f - args->v1->y);
@ -225,7 +235,7 @@ static void WriteLightArray(int y, int x0, int x1, const TriDrawTriangleArgs* ar
float globVis = thread->mainVertexShader.Viewpoint->mGlobVis;
uint32_t light = (int)(thread->PushConstants->uLightLevel * 255.0f);
uint32_t light = (int)(constants->uLightLevel * 255.0f);
fixed_t shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
fixed_t lightpos = (fixed_t)(globVis * posW * (float)FRACUNIT);
fixed_t lightstep = (fixed_t)(globVis * stepW * (float)FRACUNIT);
@ -233,49 +243,86 @@ static void WriteLightArray(int y, int x0, int x1, const TriDrawTriangleArgs* ar
fixed_t maxvis = 24 * FRACUNIT / 32;
fixed_t maxlight = 31 * FRACUNIT / 32;
uint16_t *lightarray = thread->scanline.lightarray;
fixed_t lightend = lightpos + lightstep * (x1 - x0);
if (lightpos < maxvis && shade >= lightpos && shade - lightpos <= maxlight &&
lightend < maxvis && shade >= lightend && shade - lightend <= maxlight)
{
//if (BitsPerPixel == 32)
{
lightpos += FRACUNIT - shade;
uint32_t* lightarray = thread->scanline.lightarray;
for (int x = x0; x < x1; x++)
{
lightarray[x] = lightpos >> 8;
uint32_t l = MIN(lightpos >> 8, 256);
uint32_t r = vColorR[x];
uint32_t g = vColorG[x];
uint32_t b = vColorB[x];
uint32_t a = vColorA[x];
lightarray[x] = MAKEARGB(a, (r * l) >> 8, (g * l) >> 8, (b * l) >> 8);
lightpos += lightstep;
}
}
/*else
{
lightpos = shade - lightpos;
for (int x = x0; x < x1; x++)
{
lightarray[x] = (lightpos >> 3) & 0xffffff00;
lightpos -= lightstep;
}
}*/
}
else
{
//if (BitsPerPixel == 32)
{
uint32_t* lightarray = thread->scanline.lightarray;
for (int x = x0; x < x1; x++)
{
lightarray[x] = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
uint32_t l = MIN((FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8, 256);
uint32_t r = vColorR[x];
uint32_t g = vColorG[x];
uint32_t b = vColorB[x];
uint32_t a = vColorA[x];
lightarray[x] = MAKEARGB(a, (r * l) >> 8, (g * l) >> 8, (b * l) >> 8);
lightpos += lightstep;
}
}
/*else
}
else if (constants->uFogEnabled > 0)
{
float uLightDist = constants->uLightDist;
float uLightFactor = constants->uLightFactor;
float* w = thread->scanline.W;
uint32_t* lightarray = thread->scanline.lightarray;
for (int x = x0; x < x1; x++)
{
lightarray[x] = (clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight) >> 3) & 0xffffff00;
lightpos += lightstep;
uint32_t a = thread->scanline.vColorA[x];
uint32_t r = thread->scanline.vColorR[x];
uint32_t g = thread->scanline.vColorG[x];
uint32_t b = thread->scanline.vColorB[x];
float fogdist = MAX(16.0f, w[x]);
float fogfactor = std::exp2(constants->uFogDensity * fogdist);
// brightening around the player for light mode 2:
if (fogdist < uLightDist)
{
uint32_t l = (int)((uLightFactor - (fogdist / uLightDist) * (uLightFactor - 1.0)) * 256.0f);
r = (r * l) >> 8;
g = (g * l) >> 8;
b = (b * l) >> 8;
}
// apply light diminishing through fog equation: mix(vec3(0.0, 0.0, 0.0), lightshade.rgb, fogfactor)
uint32_t t = (int)(fogfactor * 256.0f);
r = (r * t) >> 8;
g = (g * t) >> 8;
b = (b * t) >> 8;
lightarray[x] = MAKEARGB(a, r, g, b);
}
}
else
{
uint32_t* lightarray = thread->scanline.lightarray;
for (int x = x0; x < x1; x++)
{
uint32_t a = thread->scanline.vColorA[x];
uint32_t r = thread->scanline.vColorR[x];
uint32_t g = thread->scanline.vColorG[x];
uint32_t b = thread->scanline.vColorB[x];
lightarray[x] = MAKEARGB(a, r, g, b);
}
}*/
}
}
@ -412,7 +459,7 @@ void WriteVaryings(int y, int x0, int x1, const TriDrawTriangleArgs* args, PolyT
WriteVaryingColor(args->v1->g * args->v1->w + args->gradientX.G * startX + args->gradientY.G * startY, args->gradientX.G, x0, x1, thread->scanline.W, thread->scanline.vColorG);
WriteVaryingColor(args->v1->b * args->v1->w + args->gradientX.B * startX + args->gradientY.B * startY, args->gradientX.B, x0, x1, thread->scanline.W, thread->scanline.vColorB);
if (thread->PushConstants->uLightLevel >= 0.0f)
if (thread->PushConstants->uFogEnabled != -3 && thread->PushConstants->uTextureMode != TM_FOGLAYER)
WriteLightArray(y, x0, x1, args, thread);
if (thread->numPolyLights > 0)

View file

@ -368,31 +368,7 @@ static void RunAlphaTest(int x0, int x1, PolyTriangleThreadData* thread)
}
}
static void ApplyVertexColor(int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* fragcolor = thread->scanline.FragColor;
for (int x = x0; x < x1; x++)
{
uint32_t r = thread->scanline.vColorR[x];
uint32_t g = thread->scanline.vColorG[x];
uint32_t b = thread->scanline.vColorB[x];
uint32_t a = thread->scanline.vColorA[x];
a += a >> 7;
r += r >> 7;
g += g >> 7;
b += b >> 7;
uint32_t texel = fragcolor[x];
fragcolor[x] = MAKEARGB(
(APART(texel) * a + 127) >> 8,
(RPART(texel) * r + 127) >> 8,
(GPART(texel) * g + 127) >> 8,
(BPART(texel) * b + 127) >> 8);
}
}
static void MainFP(int x0, int x1, PolyTriangleThreadData* thread)
static void ProcessMaterial(int x0, int x1, PolyTriangleThreadData* thread)
{
if (thread->EffectState == SHADER_Paletted) // func_paletted
{
@ -446,65 +422,151 @@ static void MainFP(int x0, int x1, PolyTriangleThreadData* thread)
}
}
}
}
static void GetLightColor(int x0, int x1, PolyTriangleThreadData* thread)
{
uint32_t* fragcolor = thread->scanline.FragColor;
uint32_t* lightarray = thread->scanline.lightarray;
if (thread->PushConstants->uFogEnabled >= 0)
{
for (int x = x0; x < x1; x++)
{
uint32_t fg = fragcolor[x];
uint32_t lightshade = lightarray[x];
uint32_t mulA = APART(lightshade);
uint32_t mulR = RPART(lightshade);
uint32_t mulG = GPART(lightshade);
uint32_t mulB = BPART(lightshade);
mulA += mulA >> 7;
mulR += mulR >> 7;
mulG += mulG >> 7;
mulB += mulB >> 7;
uint32_t a = (APART(fg) * mulA + 127) >> 8;
uint32_t r = (RPART(fg) * mulR + 127) >> 8;
uint32_t g = (GPART(fg) * mulG + 127) >> 8;
uint32_t b = (BPART(fg) * mulB + 127) >> 8;
fragcolor[x] = MAKEARGB(a, r, g, b);
}
}
else
{
uint32_t fogR = (int)((thread->mainVertexShader.Data.uFogColor.r) * 255.0f);
uint32_t fogG = (int)((thread->mainVertexShader.Data.uFogColor.g) * 255.0f);
uint32_t fogB = (int)((thread->mainVertexShader.Data.uFogColor.b) * 255.0f);
float uFogDensity = thread->PushConstants->uFogDensity;
float* w = thread->scanline.W;
for (int x = x0; x < x1; x++)
{
uint32_t fg = fragcolor[x];
uint32_t lightshade = lightarray[x];
uint32_t mulA = APART(lightshade);
uint32_t mulR = RPART(lightshade);
uint32_t mulG = GPART(lightshade);
uint32_t mulB = BPART(lightshade);
mulA += mulA >> 7;
mulR += mulR >> 7;
mulG += mulG >> 7;
mulB += mulB >> 7;
float fogdist = MAX(16.0f, w[x]);
float fogfactor = std::exp2(uFogDensity * fogdist);
uint32_t a = (APART(fg) * mulA + 127) >> 8;
uint32_t r = (RPART(fg) * mulR + 127) >> 8;
uint32_t g = (GPART(fg) * mulG + 127) >> 8;
uint32_t b = (BPART(fg) * mulB + 127) >> 8;
uint32_t t = (int)(fogfactor * 256.0f);
uint32_t inv_t = 256 - t;
r = (fogR * inv_t + r * t + 127) >> 8;
g = (fogG * inv_t + g * t + 127) >> 8;
b = (fogB * inv_t + b * t + 127) >> 8;
fragcolor[x] = MAKEARGB(a, r, g, b);
}
}
}
static void MainFP(int x0, int x1, PolyTriangleThreadData* thread)
{
ProcessMaterial(x0, x1, thread);
if (thread->AlphaTest)
RunAlphaTest(x0, x1, thread);
ApplyVertexColor(x0, x1, thread);
auto constants = thread->PushConstants;
if (constants->uFogEnabled != -3)
{
if (constants->uTextureMode != TM_FOGLAYER)
{
GetLightColor(x0, x1, thread);
}
else
{
/*float fogdist = 0.0f;
float fogfactor = 0.0f;
if (constants->uFogEnabled != 0)
{
fogdist = MAX(16.0f, w[x]);
fogfactor = std::exp2(constants->uFogDensity * fogdist);
}
frag = vec4(uFogColor.rgb, (1.0 - fogfactor) * frag.a * 0.75 * vColor.a);*/
}
}
else // simple 2D (uses the fog color to add a color overlay)
{
uint32_t fogR = (int)((thread->mainVertexShader.Data.uFogColor.r) * 255.0f);
uint32_t fogG = (int)((thread->mainVertexShader.Data.uFogColor.g) * 255.0f);
uint32_t fogB = (int)((thread->mainVertexShader.Data.uFogColor.b) * 255.0f);
auto vColorR = thread->scanline.vColorR;
auto vColorG = thread->scanline.vColorG;
auto vColorB = thread->scanline.vColorB;
auto vColorA = thread->scanline.vColorA;
uint32_t* fragcolor = thread->scanline.FragColor;
if (constants->uLightLevel >= 0.0f && thread->numPolyLights > 0)
if (constants->uTextureMode == TM_FOGLAYER)
{
// float gray = grayscale(frag);
// vec4 cm = (uObjectColor + gray * (uAddColor - uObjectColor)) * 2;
// frag = vec4(clamp(cm.rgb, 0.0, 1.0), frag.a);
// frag = frag * vColor;
// frag.rgb = frag.rgb + uFogColor.rgb;
}
else
{
uint16_t* lightarray = thread->scanline.lightarray;
uint32_t* dynlights = thread->scanline.dynlights;
for (int x = x0; x < x1; x++)
{
uint32_t fg = fragcolor[x];
int lightshade = lightarray[x];
uint32_t dynlight = dynlights[x];
uint32_t a = vColorA[x];
uint32_t r = vColorR[x];
uint32_t g = vColorG[x];
uint32_t b = vColorB[x];
a += a >> 7;
r += r >> 7;
g += g >> 7;
b += b >> 7;
uint32_t a = APART(fg);
uint32_t r = MIN((RPART(fg) * (lightshade + RPART(dynlight))) >> 8, (uint32_t)255);
uint32_t g = MIN((GPART(fg) * (lightshade + GPART(dynlight))) >> 8, (uint32_t)255);
uint32_t b = MIN((BPART(fg) * (lightshade + BPART(dynlight))) >> 8, (uint32_t)255);
// frag = frag * vColor;
a = (APART(fragcolor[x]) * a + 127) >> 8;
r = (RPART(fragcolor[x]) * r + 127) >> 8;
g = (GPART(fragcolor[x]) * g + 127) >> 8;
b = (BPART(fragcolor[x]) * b + 127) >> 8;
// frag.rgb = frag.rgb + uFogColor.rgb;
r = MIN(r + fogR, (uint32_t)255);
g = MIN(g + fogG, (uint32_t)255);
b = MIN(b + fogB, (uint32_t)255);
fragcolor[x] = MAKEARGB(a, r, g, b);
}
}
else if (constants->uLightLevel >= 0.0f)
{
uint16_t* lightarray = thread->scanline.lightarray;
for (int x = x0; x < x1; x++)
{
uint32_t fg = fragcolor[x];
int lightshade = lightarray[x];
uint32_t a = APART(fg);
uint32_t r = (RPART(fg) * lightshade) >> 8;
uint32_t g = (GPART(fg) * lightshade) >> 8;
uint32_t b = (BPART(fg) * lightshade) >> 8;
fragcolor[x] = MAKEARGB(a, r, g, b);
}
// To do: apply fog
}
else if (thread->numPolyLights > 0)
{
uint32_t* dynlights = thread->scanline.dynlights;
for (int x = x0; x < x1; x++)
{
uint32_t fg = fragcolor[x];
uint32_t dynlight = dynlights[x];
uint32_t a = APART(fg);
uint32_t r = MIN((RPART(fg) * RPART(dynlight)) >> 8, (uint32_t)255);
uint32_t g = MIN((GPART(fg) * GPART(dynlight)) >> 8, (uint32_t)255);
uint32_t b = MIN((BPART(fg) * BPART(dynlight)) >> 8, (uint32_t)255);
fragcolor[x] = MAKEARGB(a, r, g, b);
}
}
}