mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-23 04:22:34 +00:00
- Implement the affine part of the dynamic lights in the softpoly TC SSE2 drawer
This commit is contained in:
parent
94a33229cc
commit
ca2ef805b8
1 changed files with 49 additions and 17 deletions
|
@ -142,7 +142,14 @@ namespace TriScreenDrawerModes
|
|||
}
|
||||
}
|
||||
|
||||
FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal)
|
||||
FORCEINLINE __m128i VECTORCALL AddLights(__m128i material, __m128i fgcolor, __m128i dynlight)
|
||||
{
|
||||
fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, dynlight), 8));
|
||||
fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255));
|
||||
return fgcolor;
|
||||
}
|
||||
|
||||
FORCEINLINE __m128i VECTORCALL CalcDynamicLight(const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal)
|
||||
{
|
||||
__m128i lit = _mm_setzero_si128();
|
||||
|
||||
|
@ -189,15 +196,11 @@ namespace TriScreenDrawerModes
|
|||
lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8));
|
||||
}
|
||||
|
||||
lit = _mm_min_epi16(lit, _mm_set1_epi16(256));
|
||||
|
||||
fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8));
|
||||
fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255));
|
||||
return fgcolor;
|
||||
return _mm_min_epi16(lit, _mm_set1_epi16(256));
|
||||
}
|
||||
|
||||
template<typename ShadeModeT>
|
||||
FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const PolyLight *lights, int num_lights, __m128 worldpos, __m128 worldnormal)
|
||||
FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, __m128i dynlight)
|
||||
{
|
||||
__m128i material = fgcolor;
|
||||
if (ShadeModeT::Mode == (int)ShadeMode::Simple)
|
||||
|
@ -224,7 +227,7 @@ namespace TriScreenDrawerModes
|
|||
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8);
|
||||
}
|
||||
|
||||
return AddLights(material, fgcolor, lights, num_lights, worldpos, worldnormal);
|
||||
return AddLights(material, fgcolor, dynlight);
|
||||
}
|
||||
|
||||
template<typename BlendT>
|
||||
|
@ -391,7 +394,6 @@ private:
|
|||
|
||||
auto lights = args->uniforms->Lights();
|
||||
auto num_lights = args->uniforms->NumLights();
|
||||
__m128 worldpos = _mm_setzero_ps();
|
||||
__m128 worldnormal = _mm_setzero_ps();
|
||||
|
||||
// Calculate gradients
|
||||
|
@ -472,7 +474,8 @@ private:
|
|||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
|
||||
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
|
||||
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
|
||||
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
|
||||
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
|
@ -492,6 +495,13 @@ private:
|
|||
fixed_t lightstep = (lightnext - lightpos) / 8;
|
||||
lightstep = lightstep & lightmask;
|
||||
|
||||
mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
|
||||
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
|
||||
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
|
||||
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
|
||||
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff,0xffff,0,0))), _mm_set1_epi16(256)), _mm_setzero_si128());
|
||||
dynlightstep = _mm_slli_epi16(dynlightstep, 1);
|
||||
|
||||
for (int ix = 0; ix < 4; ix++)
|
||||
{
|
||||
// Load bgcolor
|
||||
|
@ -535,11 +545,13 @@ private:
|
|||
|
||||
// Shade and blend
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, lights, num_lights, worldpos, worldnormal);
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
|
||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||
|
||||
// Store result
|
||||
_mm_storel_epi64((__m128i*)(dest + ix * 2), outcolor);
|
||||
|
||||
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());
|
||||
}
|
||||
|
||||
blockPosY.W += gradientY.W;
|
||||
|
@ -565,7 +577,8 @@ private:
|
|||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
|
||||
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
|
||||
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
|
||||
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
|
||||
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
|
@ -585,6 +598,13 @@ private:
|
|||
fixed_t lightstep = (lightnext - lightpos) / 8;
|
||||
lightstep = lightstep & lightmask;
|
||||
|
||||
mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
|
||||
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
|
||||
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
|
||||
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
|
||||
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff, 0xffff, 0, 0))), _mm_set1_epi16(256)), _mm_setzero_si128());
|
||||
dynlightstep = _mm_slli_epi16(dynlightstep, 1);
|
||||
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
// Load bgcolor
|
||||
|
@ -633,7 +653,7 @@ private:
|
|||
|
||||
// Shade and blend
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, lights, num_lights, worldpos, worldnormal);
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
|
||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||
|
||||
// Store result
|
||||
|
@ -641,6 +661,8 @@ private:
|
|||
if (mask0 & (1 << 31)) dest[x * 2] = desttmp[0];
|
||||
if (mask0 & (1 << 30)) dest[x * 2 + 1] = desttmp[1];
|
||||
|
||||
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());
|
||||
|
||||
mask0 <<= 2;
|
||||
}
|
||||
|
||||
|
@ -665,7 +687,8 @@ private:
|
|||
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
|
||||
|
||||
__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
|
||||
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
|
||||
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
|
||||
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
|
||||
|
||||
ScreenTriangleStepVariables blockPosX = blockPosY;
|
||||
blockPosX.W += gradientX.W;
|
||||
|
@ -685,6 +708,13 @@ private:
|
|||
fixed_t lightstep = (lightnext - lightpos) / 8;
|
||||
lightstep = lightstep & lightmask;
|
||||
|
||||
mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
|
||||
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
|
||||
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal);
|
||||
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
|
||||
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff, 0xffff, 0, 0))), _mm_set1_epi16(256)), _mm_setzero_si128());
|
||||
dynlightstep = _mm_slli_epi16(dynlightstep, 1);
|
||||
|
||||
for (int x = 0; x < 4; x++)
|
||||
{
|
||||
// Load bgcolor
|
||||
|
@ -733,7 +763,7 @@ private:
|
|||
|
||||
// Shade and blend
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, lights, num_lights, worldpos, worldnormal);
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
|
||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||
|
||||
// Store result
|
||||
|
@ -741,6 +771,8 @@ private:
|
|||
if (mask1 & (1 << 31)) dest[x * 2] = desttmp[0];
|
||||
if (mask1 & (1 << 30)) dest[x * 2 + 1] = desttmp[1];
|
||||
|
||||
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());
|
||||
|
||||
mask1 <<= 2;
|
||||
}
|
||||
|
||||
|
@ -892,7 +924,7 @@ private:
|
|||
|
||||
// Shade and blend
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, nullptr, 0, _mm_setzero_ps(), _mm_setzero_ps());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128());
|
||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||
|
||||
// Store result
|
||||
|
@ -920,7 +952,7 @@ private:
|
|||
|
||||
// Shade and blend
|
||||
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, nullptr, 0, _mm_setzero_ps(), _mm_setzero_ps());
|
||||
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, _mm_setzero_si128());
|
||||
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
|
||||
|
||||
// Store result
|
||||
|
|
Loading…
Reference in a new issue