mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-11 15:21:51 +00:00
Merge remote-tracking branch 'origin/master' into asmjit
This commit is contained in:
commit
d195efc431
2 changed files with 278 additions and 299 deletions
|
@ -83,7 +83,7 @@ public:
|
|||
float worldposZ[MAXWIDTH];
|
||||
uint32_t texel[MAXWIDTH];
|
||||
int32_t texelV[MAXWIDTH];
|
||||
fixed_t lightarray[MAXWIDTH];
|
||||
uint16_t lightarray[MAXWIDTH];
|
||||
uint32_t dynlights[MAXWIDTH];
|
||||
|
||||
static PolyTriangleThreadData *Get(DrawerThread *thread);
|
||||
|
|
|
@ -378,23 +378,16 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat
|
|||
}
|
||||
}
|
||||
|
||||
template<typename ModeT, typename OptT>
|
||||
void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread)
|
||||
template<typename ModeT, typename OptT, int BitsPerPixel>
|
||||
void StepSpan(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread)
|
||||
{
|
||||
using namespace TriScreenDrawerModes;
|
||||
|
||||
uint32_t fixedlight;
|
||||
uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate;
|
||||
fixed_t fuzzscale;
|
||||
int _fuzzpos;
|
||||
const uint32_t *texPixels, *translation;
|
||||
int texWidth, texHeight;
|
||||
uint32_t fillcolor;
|
||||
int actoralpha;
|
||||
float v1X, v1Y, v1W, v1U, v1V, v1WorldX, v1WorldY, v1WorldZ;
|
||||
float startX, startY;
|
||||
float stepW, stepU, stepV, stepWorldX, stepWorldY, stepWorldZ;
|
||||
float posW, posU, posV, posWorldX, posWorldY, posWorldZ;
|
||||
int texWidth, texHeight;
|
||||
uint32_t light;
|
||||
fixed_t shade, lightpos, lightstep;
|
||||
|
||||
|
@ -403,31 +396,9 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
|||
float *worldposZ = thread->worldposZ;
|
||||
uint32_t *texel = thread->texel;
|
||||
int32_t *texelV = thread->texelV;
|
||||
fixed_t *lightarray = thread->lightarray;
|
||||
uint16_t *lightarray = thread->lightarray;
|
||||
uint32_t *dynlights = thread->dynlights;
|
||||
|
||||
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||
{
|
||||
texPixels = (const uint32_t*)args->uniforms->TexturePixels();
|
||||
texWidth = args->uniforms->TextureWidth();
|
||||
texHeight = args->uniforms->TextureHeight();
|
||||
}
|
||||
|
||||
if (ModeT::SWFlags & SWSTYLEF_Translated)
|
||||
{
|
||||
translation = (const uint32_t*)args->uniforms->Translation();
|
||||
}
|
||||
|
||||
if ((ModeT::SWFlags & SWSTYLEF_Fill) || (ModeT::SWFlags & SWSTYLEF_Skycap) || (ModeT::Flags & STYLEF_ColorIsFixed))
|
||||
{
|
||||
fillcolor = args->uniforms->Color();
|
||||
}
|
||||
|
||||
if (!(ModeT::Flags & STYLEF_Alpha1))
|
||||
{
|
||||
actoralpha = args->uniforms->Alpha();
|
||||
}
|
||||
|
||||
v1X = args->v1->x;
|
||||
v1Y = args->v1->y;
|
||||
v1W = args->v1->w;
|
||||
|
@ -442,6 +413,12 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
|||
posU = v1U + stepU * startX + args->gradientY.U * startY;
|
||||
posV = v1V + stepV * startX + args->gradientY.V * startY;
|
||||
|
||||
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||
{
|
||||
texWidth = args->uniforms->TextureWidth();
|
||||
texHeight = args->uniforms->TextureHeight();
|
||||
}
|
||||
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
{
|
||||
v1WorldX = args->v1->worldX * v1W;
|
||||
|
@ -463,8 +440,128 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
|||
shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
|
||||
lightpos = (fixed_t)(globVis * posW * (float)FRACUNIT);
|
||||
lightstep = (fixed_t)(globVis * stepW * (float)FRACUNIT);
|
||||
|
||||
fixed_t maxvis = 24 * FRACUNIT / 32;
|
||||
fixed_t maxlight = 31 * FRACUNIT / 32;
|
||||
|
||||
fixed_t lightend = lightpos + lightstep * (x1 - x0);
|
||||
if (lightpos < maxvis && shade >= lightpos && shade - lightpos <= maxlight &&
|
||||
lightend < maxvis && shade >= lightend && shade - lightend <= maxlight)
|
||||
{
|
||||
if (BitsPerPixel == 32)
|
||||
{
|
||||
lightpos += FRACUNIT - shade;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
lightarray[x] = lightpos >> 8;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
lightpos = shade - lightpos;
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
lightarray[x] = (lightpos >> 3) & 0xffffff00;
|
||||
lightpos -= lightstep;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (BitsPerPixel == 32)
|
||||
{
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
lightarray[x] = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
lightarray[x] = (clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight) >> 3) & 0xffffff00;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NO_SSE
|
||||
__m128 mposW, mposU, mposV, mstepW, mstepU, mstepV;
|
||||
__m128 mposWorldX, mposWorldY, mposWorldZ, mstepWorldX, mstepWorldY, mstepWorldZ;
|
||||
__m128i mtexMul1, mtexMul2;
|
||||
|
||||
#define SETUP_STEP_SSE(mpos,mstep,pos,step) \
|
||||
mstep = _mm_load_ss(&step); \
|
||||
mpos = _mm_load_ss(&pos); \
|
||||
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
|
||||
mpos = _mm_add_ss(mpos, mstep); \
|
||||
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
|
||||
mpos = _mm_add_ss(mpos, mstep); \
|
||||
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
|
||||
mpos = _mm_add_ss(mpos, mstep); \
|
||||
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(0, 1, 2, 3)); \
|
||||
mstep = _mm_mul_ss(mstep, _mm_set1_ps(4.0f)); \
|
||||
mstep = _mm_shuffle_ps(mstep, mstep, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
SETUP_STEP_SSE(mposW, mstepW, posW, stepW);
|
||||
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
{
|
||||
SETUP_STEP_SSE(mposWorldX, mstepWorldX, posWorldX, stepWorldX);
|
||||
SETUP_STEP_SSE(mposWorldY, mstepWorldY, posWorldY, stepWorldY);
|
||||
SETUP_STEP_SSE(mposWorldZ, mstepWorldZ, posWorldZ, stepWorldZ);
|
||||
}
|
||||
|
||||
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||
{
|
||||
SETUP_STEP_SSE(mposU, mstepU, posU, stepU);
|
||||
SETUP_STEP_SSE(mposV, mstepV, posV, stepV);
|
||||
|
||||
mtexMul1 = _mm_setr_epi16(texWidth, texWidth, texWidth, texWidth, texHeight, texHeight, texHeight, texHeight);
|
||||
mtexMul2 = _mm_setr_epi16(texHeight, texHeight, texHeight, texHeight, 1, 1, 1, 1);
|
||||
}
|
||||
|
||||
#undef SETUP_STEP_SSE
|
||||
|
||||
for (int x = x0; x < x1; x += 4)
|
||||
{
|
||||
__m128 rcp_posW = _mm_rcp_ps(mposW);
|
||||
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
{
|
||||
_mm_storeu_ps(&worldposX[x], _mm_mul_ps(mposWorldX, rcp_posW));
|
||||
_mm_storeu_ps(&worldposY[x], _mm_mul_ps(mposWorldY, rcp_posW));
|
||||
_mm_storeu_ps(&worldposZ[x], _mm_mul_ps(mposWorldZ, rcp_posW));
|
||||
mposWorldX = _mm_add_ps(mposWorldX, mstepWorldX);
|
||||
mposWorldY = _mm_add_ps(mposWorldY, mstepWorldY);
|
||||
mposWorldZ = _mm_add_ps(mposWorldZ, mstepWorldZ);
|
||||
}
|
||||
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||
{
|
||||
__m128 rcpW = _mm_mul_ps(_mm_set1_ps(0x01000000), rcp_posW);
|
||||
__m128i u = _mm_cvtps_epi32(_mm_mul_ps(mposU, rcpW));
|
||||
__m128i v = _mm_cvtps_epi32(_mm_mul_ps(mposV, rcpW));
|
||||
_mm_storeu_si128((__m128i*)&texelV[x], v);
|
||||
|
||||
__m128i texelX = _mm_srli_epi32(_mm_slli_epi32(u, 8), 17);
|
||||
__m128i texelY = _mm_srli_epi32(_mm_slli_epi32(v, 8), 17);
|
||||
__m128i texelXY = _mm_mulhi_epu16(_mm_slli_epi16(_mm_packs_epi32(texelX, texelY), 1), mtexMul1);
|
||||
__m128i texlo = _mm_mullo_epi16(texelXY, mtexMul2);
|
||||
__m128i texhi = _mm_mulhi_epi16(texelXY, mtexMul2);
|
||||
texelX = _mm_unpacklo_epi16(texlo, texhi);
|
||||
texelY = _mm_unpackhi_epi16(texlo, texhi);
|
||||
_mm_storeu_si128((__m128i*)&texel[x], _mm_add_epi32(texelX, texelY));
|
||||
|
||||
mposU = _mm_add_ps(mposU, mstepU);
|
||||
mposV = _mm_add_ps(mposV, mstepV);
|
||||
}
|
||||
|
||||
mposW = _mm_add_ps(mposW, mstepW);
|
||||
}
|
||||
#else
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
|
@ -477,15 +574,6 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
|||
posWorldY += stepWorldY;
|
||||
posWorldZ += stepWorldZ;
|
||||
}
|
||||
|
||||
if (!(OptT::Flags & SWOPT_FixedLight))
|
||||
{
|
||||
fixed_t maxvis = 24 * FRACUNIT / 32;
|
||||
fixed_t maxlight = 31 * FRACUNIT / 32;
|
||||
lightarray[x] = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
|
||||
lightpos += lightstep;
|
||||
}
|
||||
|
||||
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||
{
|
||||
float rcpW = 0x01000000 / posW;
|
||||
|
@ -501,6 +589,7 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
|||
|
||||
posW += stepW;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
{
|
||||
|
@ -539,6 +628,61 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
|||
worldnormalZ *= rcplen;
|
||||
}
|
||||
|
||||
#ifndef NO_SSE
|
||||
__m128 mworldnormalX = _mm_set1_ps(worldnormalX);
|
||||
__m128 mworldnormalY = _mm_set1_ps(worldnormalY);
|
||||
__m128 mworldnormalZ = _mm_set1_ps(worldnormalZ);
|
||||
for (int x = x0; x < x1; x += 4)
|
||||
{
|
||||
__m128i litlo = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(dynlightcolor), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
|
||||
__m128i lithi = litlo;
|
||||
|
||||
for (int i = 0; i < num_lights; i++)
|
||||
{
|
||||
__m128 lightposX = _mm_set1_ps(lights[i].x);
|
||||
__m128 lightposY = _mm_set1_ps(lights[i].y);
|
||||
__m128 lightposZ = _mm_set1_ps(lights[i].z);
|
||||
__m128 light_radius = _mm_set1_ps(lights[i].radius);
|
||||
__m128i light_color = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(lights[i].color), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
|
||||
|
||||
__m128 is_attenuated = _mm_cmplt_ps(light_radius, _mm_setzero_ps());
|
||||
light_radius = _mm_andnot_ps(_mm_set1_ps(-0.0f), light_radius); // clear sign bit
|
||||
|
||||
// L = light-pos
|
||||
// dist = sqrt(dot(L, L))
|
||||
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
|
||||
__m128 Lx = _mm_sub_ps(lightposX, _mm_loadu_ps(&worldposX[x]));
|
||||
__m128 Ly = _mm_sub_ps(lightposY, _mm_loadu_ps(&worldposY[x]));
|
||||
__m128 Lz = _mm_sub_ps(lightposZ, _mm_loadu_ps(&worldposZ[x]));
|
||||
__m128 dist2 = _mm_add_ps(_mm_mul_ps(Lx, Lx), _mm_add_ps(_mm_mul_ps(Ly, Ly), _mm_mul_ps(Lz, Lz)));
|
||||
__m128 rcp_dist = _mm_rsqrt_ps(dist2);
|
||||
__m128 dist = _mm_mul_ps(dist2, rcp_dist);
|
||||
__m128 distance_attenuation = _mm_sub_ps(_mm_set1_ps(256.0f), _mm_min_ps(_mm_mul_ps(dist, light_radius), _mm_set1_ps(256.0f)));
|
||||
|
||||
// The simple light type
|
||||
__m128 simple_attenuation = distance_attenuation;
|
||||
|
||||
// The point light type
|
||||
// diffuse = max(dot(N,normalize(L)),0) * attenuation
|
||||
Lx = _mm_mul_ps(Lx, rcp_dist);
|
||||
Ly = _mm_mul_ps(Ly, rcp_dist);
|
||||
Lz = _mm_mul_ps(Lz, rcp_dist);
|
||||
__m128 dotNL = _mm_add_ps(_mm_add_ps(_mm_mul_ps(mworldnormalX, Lx), _mm_mul_ps(mworldnormalY, Ly)), _mm_mul_ps(mworldnormalZ, Lz));
|
||||
__m128 point_attenuation = _mm_mul_ps(_mm_max_ps(dotNL, _mm_setzero_ps()), distance_attenuation);
|
||||
|
||||
__m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, point_attenuation), _mm_andnot_ps(is_attenuated, simple_attenuation)));
|
||||
|
||||
attenuation = _mm_shufflehi_epi16(_mm_shufflelo_epi16(attenuation, _MM_SHUFFLE(2, 2, 0, 0)), _MM_SHUFFLE(2, 2, 0, 0));
|
||||
__m128i attenlo = _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 0, 0));
|
||||
__m128i attenhi = _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(3, 3, 2, 2));
|
||||
|
||||
litlo = _mm_add_epi16(litlo, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenlo), 8));
|
||||
lithi = _mm_add_epi16(lithi, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenhi), 8));
|
||||
}
|
||||
|
||||
_mm_storeu_si128((__m128i*)&dynlights[x], _mm_packus_epi16(litlo, lithi));
|
||||
}
|
||||
#else
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
uint32_t lit_r = RPART(dynlightcolor);
|
||||
|
@ -596,6 +740,48 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
|||
lit_b = MIN<uint32_t>(lit_b, 255);
|
||||
dynlights[x] = MAKEARGB(255, lit_r, lit_g, lit_b);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<typename ModeT, typename OptT>
|
||||
void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread)
|
||||
{
|
||||
using namespace TriScreenDrawerModes;
|
||||
|
||||
StepSpan<ModeT, OptT, 32>(y, x0, x1, args, thread);
|
||||
|
||||
uint32_t fixedlight;
|
||||
uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate;
|
||||
fixed_t fuzzscale;
|
||||
int _fuzzpos;
|
||||
const uint32_t *texPixels, *translation;
|
||||
uint32_t fillcolor;
|
||||
int actoralpha;
|
||||
|
||||
uint32_t *texel = thread->texel;
|
||||
int32_t *texelV = thread->texelV;
|
||||
uint16_t *lightarray = thread->lightarray;
|
||||
uint32_t *dynlights = thread->dynlights;
|
||||
|
||||
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||
{
|
||||
texPixels = (const uint32_t*)args->uniforms->TexturePixels();
|
||||
}
|
||||
|
||||
if (ModeT::SWFlags & SWSTYLEF_Translated)
|
||||
{
|
||||
translation = (const uint32_t*)args->uniforms->Translation();
|
||||
}
|
||||
|
||||
if ((ModeT::SWFlags & SWSTYLEF_Fill) || (ModeT::SWFlags & SWSTYLEF_Skycap) || (ModeT::Flags & STYLEF_ColorIsFixed))
|
||||
{
|
||||
fillcolor = args->uniforms->Color();
|
||||
}
|
||||
|
||||
if (!(ModeT::Flags & STYLEF_Alpha1))
|
||||
{
|
||||
actoralpha = args->uniforms->Alpha();
|
||||
}
|
||||
|
||||
if (OptT::Flags & SWOPT_FixedLight)
|
||||
|
@ -650,7 +836,7 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
|
|||
__m128i mfg = _mm_unpacklo_epi8(_mm_setr_epi32(texPixels[texel[x]], texPixels[texel[x + 1]], 0, 0), _mm_setzero_si128());
|
||||
|
||||
if (!(OptT::Flags & SWOPT_FixedLight))
|
||||
mlightshade = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_loadl_epi64((const __m128i*)&lightarray[x]), _MM_SHUFFLE(2, 2, 0, 0)), _MM_SHUFFLE(1, 1, 0, 0));
|
||||
mlightshade = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_cvtsi32_si128(*(int*)&lightarray[x]), _MM_SHUFFLE(1, 1, 0, 0)), _MM_SHUFFLE(1, 1, 0, 0));
|
||||
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
{
|
||||
|
@ -997,226 +1183,52 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
|
|||
{
|
||||
using namespace TriScreenDrawerModes;
|
||||
|
||||
float v1X, v1Y, v1W, v1U, v1V, v1WorldX, v1WorldY, v1WorldZ;
|
||||
float startX, startY;
|
||||
float stepW, stepU, stepV, stepWorldX, stepWorldY, stepWorldZ;
|
||||
float posW, posU, posV, posWorldX, posWorldY, posWorldZ;
|
||||
StepSpan<ModeT, OptT, 8>(y, x0, x1, args, thread);
|
||||
|
||||
PolyLight *lights;
|
||||
int num_lights;
|
||||
float worldnormalX, worldnormalY, worldnormalZ;
|
||||
uint32_t dynlightcolor;
|
||||
uint32_t fixedlight, capcolor;
|
||||
fixed_t fuzzscale;
|
||||
int _fuzzpos;
|
||||
const uint8_t *colormaps, *texPixels, *translation;
|
||||
int texWidth, texHeight;
|
||||
uint32_t fillcolor, capcolor;
|
||||
int alpha;
|
||||
uint32_t light;
|
||||
fixed_t shade, lightpos, lightstep;
|
||||
int16_t dynlights_r[MAXWIDTH / 16], dynlights_g[MAXWIDTH / 16], dynlights_b[MAXWIDTH / 16];
|
||||
int16_t posdynlight_r, posdynlight_g, posdynlight_b;
|
||||
fixed_t lightarray[MAXWIDTH / 16];
|
||||
uint32_t fillcolor;
|
||||
int actoralpha;
|
||||
|
||||
v1X = args->v1->x;
|
||||
v1Y = args->v1->y;
|
||||
v1W = args->v1->w;
|
||||
v1U = args->v1->u * v1W;
|
||||
v1V = args->v1->v * v1W;
|
||||
startX = x0 + (0.5f - v1X);
|
||||
startY = y + (0.5f - v1Y);
|
||||
stepW = args->gradientX.W;
|
||||
stepU = args->gradientX.U;
|
||||
stepV = args->gradientX.V;
|
||||
posW = v1W + stepW * startX + args->gradientY.W * startY;
|
||||
posU = v1U + stepU * startX + args->gradientY.U * startY;
|
||||
posV = v1V + stepV * startX + args->gradientY.V * startY;
|
||||
uint32_t *texel = thread->texel;
|
||||
int32_t *texelV = thread->texelV;
|
||||
uint16_t *lightarray = thread->lightarray;
|
||||
uint32_t *dynlights = thread->dynlights;
|
||||
|
||||
texPixels = args->uniforms->TexturePixels();
|
||||
translation = args->uniforms->Translation();
|
||||
texWidth = args->uniforms->TextureWidth();
|
||||
texHeight = args->uniforms->TextureHeight();
|
||||
fillcolor = args->uniforms->Color();
|
||||
alpha = args->uniforms->Alpha();
|
||||
colormaps = args->uniforms->BaseColormap();
|
||||
light = args->uniforms->Light();
|
||||
|
||||
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
|
||||
{
|
||||
texPixels = args->uniforms->TexturePixels();
|
||||
}
|
||||
|
||||
if (ModeT::SWFlags & SWSTYLEF_Translated)
|
||||
{
|
||||
translation = args->uniforms->Translation();
|
||||
}
|
||||
|
||||
if ((ModeT::SWFlags & SWSTYLEF_Fill) || (ModeT::SWFlags & SWSTYLEF_Skycap) || (ModeT::Flags & STYLEF_ColorIsFixed))
|
||||
{
|
||||
fillcolor = args->uniforms->Color();
|
||||
}
|
||||
|
||||
if (!(ModeT::Flags & STYLEF_Alpha1))
|
||||
{
|
||||
actoralpha = args->uniforms->Alpha();
|
||||
}
|
||||
|
||||
if (ModeT::SWFlags & SWSTYLEF_Skycap)
|
||||
capcolor = GPalette.BaseColors[fillcolor].d;
|
||||
|
||||
if (OptT::Flags & SWOPT_FixedLight)
|
||||
{
|
||||
light += light >> 7; // 255 -> 256
|
||||
light = ((256 - light) * NUMCOLORMAPS) & 0xffffff00;
|
||||
}
|
||||
else
|
||||
{
|
||||
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
|
||||
|
||||
shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
|
||||
lightpos = (fixed_t)(globVis * posW * (float)FRACUNIT);
|
||||
lightstep = (fixed_t)(globVis * stepW * (float)FRACUNIT);
|
||||
|
||||
int affineOffset = x0 / 16 * 16 - x0;
|
||||
lightpos = lightpos + lightstep * affineOffset;
|
||||
lightstep = lightstep * 16;
|
||||
|
||||
fixed_t maxvis = 24 * FRACUNIT / 32;
|
||||
fixed_t maxlight = 31 * FRACUNIT / 32;
|
||||
|
||||
for (int x = x0 / 16; x <= x1 / 16 + 1; x++)
|
||||
{
|
||||
lightarray[x] = (clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight) >> 8) << 5;
|
||||
lightpos += lightstep;
|
||||
fixedlight = args->uniforms->Light();
|
||||
fixedlight += fixedlight >> 7; // 255 -> 256
|
||||
fixedlight = ((256 - fixedlight) * NUMCOLORMAPS) & 0xffffff00;
|
||||
}
|
||||
|
||||
int offset = x0 >> 4;
|
||||
int t1 = x0 & 15;
|
||||
int t0 = 16 - t1;
|
||||
lightpos = (lightarray[offset] * t0 + lightarray[offset + 1] * t1);
|
||||
|
||||
for (int x = x0 / 16; x <= x1 / 16; x++)
|
||||
{
|
||||
lightarray[x] = lightarray[x + 1] - lightarray[x];
|
||||
}
|
||||
}
|
||||
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
{
|
||||
v1WorldX = args->v1->worldX * v1W;
|
||||
v1WorldY = args->v1->worldY * v1W;
|
||||
v1WorldZ = args->v1->worldZ * v1W;
|
||||
stepWorldX = args->gradientX.WorldX;
|
||||
stepWorldY = args->gradientX.WorldY;
|
||||
stepWorldZ = args->gradientX.WorldZ;
|
||||
posWorldX = v1WorldX + stepWorldX * startX + args->gradientY.WorldX * startY;
|
||||
posWorldY = v1WorldY + stepWorldY * startX + args->gradientY.WorldY * startY;
|
||||
posWorldZ = v1WorldZ + stepWorldZ * startX + args->gradientY.WorldZ * startY;
|
||||
|
||||
lights = args->uniforms->Lights();
|
||||
num_lights = args->uniforms->NumLights();
|
||||
worldnormalX = args->uniforms->Normal().X;
|
||||
worldnormalY = args->uniforms->Normal().Y;
|
||||
worldnormalZ = args->uniforms->Normal().Z;
|
||||
dynlightcolor = args->uniforms->DynLightColor();
|
||||
|
||||
// The normal vector cannot be uniform when drawing models. Calculate and use the face normal:
|
||||
if (worldnormalX == 0.0f && worldnormalY == 0.0f && worldnormalZ == 0.0f)
|
||||
{
|
||||
float dx1 = args->v2->worldX - args->v1->worldX;
|
||||
float dy1 = args->v2->worldY - args->v1->worldY;
|
||||
float dz1 = args->v2->worldZ - args->v1->worldZ;
|
||||
float dx2 = args->v3->worldX - args->v1->worldX;
|
||||
float dy2 = args->v3->worldY - args->v1->worldY;
|
||||
float dz2 = args->v3->worldZ - args->v1->worldZ;
|
||||
worldnormalX = dy1 * dz2 - dz1 * dy2;
|
||||
worldnormalY = dz1 * dx2 - dx1 * dz2;
|
||||
worldnormalZ = dx1 * dy2 - dy1 * dx2;
|
||||
float lensqr = worldnormalX * worldnormalX + worldnormalY * worldnormalY + worldnormalZ * worldnormalZ;
|
||||
#ifndef NO_SSE
|
||||
float rcplen = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(lensqr)));
|
||||
#else
|
||||
float rcplen = 1.0f / sqrt(lensqr);
|
||||
#endif
|
||||
worldnormalX *= rcplen;
|
||||
worldnormalY *= rcplen;
|
||||
worldnormalZ *= rcplen;
|
||||
}
|
||||
|
||||
int affineOffset = x0 / 16 * 16 - x0;
|
||||
float posLightW = posW + stepW * affineOffset;
|
||||
posWorldX = posWorldX + stepWorldX * affineOffset;
|
||||
posWorldY = posWorldY + stepWorldY * affineOffset;
|
||||
posWorldZ = posWorldZ + stepWorldZ * affineOffset;
|
||||
float stepLightW = stepW * 16.0f;
|
||||
stepWorldX *= 16.0f;
|
||||
stepWorldY *= 16.0f;
|
||||
stepWorldZ *= 16.0f;
|
||||
|
||||
for (int x = x0 / 16; x <= x1 / 16 + 1; x++)
|
||||
{
|
||||
uint32_t lit_r = RPART(dynlightcolor);
|
||||
uint32_t lit_g = GPART(dynlightcolor);
|
||||
uint32_t lit_b = BPART(dynlightcolor);
|
||||
|
||||
float rcp_posW = 1.0f / posLightW;
|
||||
float worldposX = posWorldX * rcp_posW;
|
||||
float worldposY = posWorldY * rcp_posW;
|
||||
float worldposZ = posWorldZ * rcp_posW;
|
||||
for (int i = 0; i < num_lights; i++)
|
||||
{
|
||||
float lightposX = lights[i].x;
|
||||
float lightposY = lights[i].y;
|
||||
float lightposZ = lights[i].z;
|
||||
float light_radius = lights[i].radius;
|
||||
uint32_t light_color = lights[i].color;
|
||||
|
||||
bool is_attenuated = light_radius < 0.0f;
|
||||
if (is_attenuated)
|
||||
light_radius = -light_radius;
|
||||
|
||||
// L = light-pos
|
||||
// dist = sqrt(dot(L, L))
|
||||
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
|
||||
float Lx = lightposX - worldposX;
|
||||
float Ly = lightposY - worldposY;
|
||||
float Lz = lightposZ - worldposZ;
|
||||
float dist2 = Lx * Lx + Ly * Ly + Lz * Lz;
|
||||
#ifdef NO_SSE
|
||||
//float rcp_dist = 1.0f / sqrt(dist2);
|
||||
float rcp_dist = 1.0f / (dist2 * 0.01f);
|
||||
#else
|
||||
float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2)));
|
||||
#endif
|
||||
float dist = dist2 * rcp_dist;
|
||||
float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f);
|
||||
|
||||
// The simple light type
|
||||
float simple_attenuation = distance_attenuation;
|
||||
|
||||
// The point light type
|
||||
// diffuse = max(dot(N,normalize(L)),0) * attenuation
|
||||
Lx *= rcp_dist;
|
||||
Ly *= rcp_dist;
|
||||
Lz *= rcp_dist;
|
||||
float dotNL = worldnormalX * Lx + worldnormalY * Ly + worldnormalZ * Lz;
|
||||
float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation;
|
||||
|
||||
uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation);
|
||||
|
||||
lit_r += (RPART(light_color) * attenuation) >> 8;
|
||||
lit_g += (GPART(light_color) * attenuation) >> 8;
|
||||
lit_b += (BPART(light_color) * attenuation) >> 8;
|
||||
}
|
||||
|
||||
lit_r = MIN<uint32_t>(lit_r, 255);
|
||||
lit_g = MIN<uint32_t>(lit_g, 255);
|
||||
lit_b = MIN<uint32_t>(lit_b, 255);
|
||||
dynlights_r[x] = lit_r;
|
||||
dynlights_g[x] = lit_g;
|
||||
dynlights_b[x] = lit_b;
|
||||
|
||||
posLightW += stepLightW;
|
||||
posWorldX += stepWorldX;
|
||||
posWorldY += stepWorldY;
|
||||
posWorldZ += stepWorldZ;
|
||||
}
|
||||
|
||||
int offset = x0 >> 4;
|
||||
int t1 = x0 & 15;
|
||||
int t0 = 16 - t1;
|
||||
posdynlight_r = (dynlights_r[offset] * t0 + dynlights_r[offset + 1] * t1);
|
||||
posdynlight_g = (dynlights_g[offset] * t0 + dynlights_g[offset + 1] * t1);
|
||||
posdynlight_b = (dynlights_b[offset] * t0 + dynlights_b[offset + 1] * t1);
|
||||
|
||||
for (int x = x0 / 16; x <= x1 / 16; x++)
|
||||
{
|
||||
dynlights_r[x] = dynlights_r[x + 1] - dynlights_r[x];
|
||||
dynlights_g[x] = dynlights_g[x + 1] - dynlights_g[x];
|
||||
dynlights_b[x] = dynlights_b[x + 1] - dynlights_b[x];
|
||||
}
|
||||
}
|
||||
|
||||
fixed_t fuzzscale;
|
||||
int _fuzzpos;
|
||||
if (ModeT::BlendOp == STYLEOP_Fuzz)
|
||||
{
|
||||
fuzzscale = (200 << FRACBITS) / viewheight;
|
||||
|
@ -1226,19 +1238,13 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
|
|||
uint8_t *dest = (uint8_t*)args->dest;
|
||||
uint8_t *destLine = dest + args->pitch * y;
|
||||
|
||||
int x = x0;
|
||||
while (x < x1)
|
||||
for (int x = x0; x < x1; x++)
|
||||
{
|
||||
if (ModeT::BlendOp == STYLEOP_Fuzz)
|
||||
{
|
||||
using namespace swrenderer;
|
||||
|
||||
float rcpW = 0x01000000 / posW;
|
||||
int32_t u = (int32_t)(posU * rcpW);
|
||||
int32_t v = (int32_t)(posV * rcpW);
|
||||
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
|
||||
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
|
||||
unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0;
|
||||
unsigned int sampleshadeout = (texPixels[texel[x]] != 0) ? 256 : 0;
|
||||
|
||||
int scaled_x = (x * fuzzscale) >> FRACBITS;
|
||||
int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + _fuzzpos;
|
||||
|
@ -1259,12 +1265,8 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
|
|||
}
|
||||
else if (ModeT::SWFlags & SWSTYLEF_Skycap)
|
||||
{
|
||||
float rcpW = 0x01000000 / posW;
|
||||
int32_t u = (int32_t)(posU * rcpW);
|
||||
int32_t v = (int32_t)(posV * rcpW);
|
||||
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
|
||||
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
|
||||
int fg = texPixels[texelX * texHeight + texelY];
|
||||
int32_t v = texelV[x];
|
||||
int fg = texPixels[texel[x]];
|
||||
|
||||
int start_fade = 2; // How fast it should fade out
|
||||
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
|
||||
|
@ -1301,12 +1303,11 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
|
|||
uint8_t shadedfg;
|
||||
if (OptT::Flags & SWOPT_FixedLight)
|
||||
{
|
||||
shadedfg = colormaps[light + fg];
|
||||
shadedfg = colormaps[fixedlight + fg];
|
||||
}
|
||||
else
|
||||
{
|
||||
int lightshade = (lightpos >> 4) & 0xffffff00;
|
||||
shadedfg = colormaps[lightshade + fg];
|
||||
shadedfg = colormaps[lightarray[x] + fg];
|
||||
}
|
||||
|
||||
destLine[x] = shadedfg;
|
||||
|
@ -1320,12 +1321,7 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
|
|||
}
|
||||
else
|
||||
{
|
||||
float rcpW = 0x01000000 / posW;
|
||||
int32_t u = (int32_t)(posU * rcpW);
|
||||
int32_t v = (int32_t)(posV * rcpW);
|
||||
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
|
||||
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
|
||||
fg = texPixels[texelX * texHeight + texelY];
|
||||
fg = texPixels[texel[x]];
|
||||
}
|
||||
|
||||
int fgalpha = 255;
|
||||
|
@ -1345,7 +1341,7 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
|
|||
|
||||
if (!(ModeT::Flags & STYLEF_Alpha1))
|
||||
{
|
||||
fgalpha = (fgalpha * alpha) >> 8;
|
||||
fgalpha = (fgalpha * actoralpha) >> 8;
|
||||
}
|
||||
|
||||
if (ModeT::SWFlags & SWSTYLEF_Translated)
|
||||
|
@ -1354,28 +1350,24 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
|
|||
uint8_t shadedfg;
|
||||
if (OptT::Flags & SWOPT_FixedLight)
|
||||
{
|
||||
shadedfg = colormaps[light + fg];
|
||||
shadedfg = colormaps[fixedlight + fg];
|
||||
}
|
||||
else
|
||||
{
|
||||
int lightshade = (lightpos >> 4) & 0xffffff00;
|
||||
shadedfg = colormaps[lightshade + fg];
|
||||
shadedfg = colormaps[lightarray[x] + fg];
|
||||
}
|
||||
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
{
|
||||
if (posdynlight_r | posdynlight_g | posdynlight_b)
|
||||
uint32_t lit = dynlights[x];
|
||||
if (lit & 0x00ffffff)
|
||||
{
|
||||
uint32_t lit_r = posdynlight_r >> 4;
|
||||
uint32_t lit_g = posdynlight_g >> 4;
|
||||
uint32_t lit_b = posdynlight_b >> 4;
|
||||
|
||||
uint32_t fgrgb = GPalette.BaseColors[fg];
|
||||
uint32_t shadedfgrgb = GPalette.BaseColors[shadedfg];
|
||||
|
||||
uint32_t out_r = MIN(((RPART(fgrgb) * lit_r) >> 8) + RPART(shadedfgrgb), (uint32_t)255);
|
||||
uint32_t out_g = MIN(((GPART(fgrgb) * lit_g) >> 8) + GPART(shadedfgrgb), (uint32_t)255);
|
||||
uint32_t out_b = MIN(((BPART(fgrgb) * lit_b) >> 8) + BPART(shadedfgrgb), (uint32_t)255);
|
||||
uint32_t out_r = MIN(((RPART(fgrgb) * RPART(lit)) >> 8) + RPART(shadedfgrgb), (uint32_t)255);
|
||||
uint32_t out_g = MIN(((GPART(fgrgb) * GPART(lit)) >> 8) + GPART(shadedfgrgb), (uint32_t)255);
|
||||
uint32_t out_b = MIN(((BPART(fgrgb) * BPART(lit)) >> 8) + BPART(shadedfgrgb), (uint32_t)255);
|
||||
shadedfg = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)];
|
||||
}
|
||||
}
|
||||
|
@ -1492,19 +1484,6 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
|
|||
destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)];
|
||||
}
|
||||
}
|
||||
|
||||
posW += stepW;
|
||||
posU += stepU;
|
||||
posV += stepV;
|
||||
if (OptT::Flags & SWOPT_DynLights)
|
||||
{
|
||||
posdynlight_r += dynlights_r[x >> 4];
|
||||
posdynlight_g += dynlights_g[x >> 4];
|
||||
posdynlight_b += dynlights_b[x >> 4];
|
||||
}
|
||||
if (!(OptT::Flags & SWOPT_FixedLight))
|
||||
lightpos += lightarray[x >> 4];
|
||||
x++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue