Merge remote-tracking branch 'origin/master' into asmjit

This commit is contained in:
Magnus Norddahl 2018-11-27 15:38:10 +01:00
commit d195efc431
2 changed files with 278 additions and 299 deletions

View file

@ -83,7 +83,7 @@ public:
float worldposZ[MAXWIDTH];
uint32_t texel[MAXWIDTH];
int32_t texelV[MAXWIDTH];
fixed_t lightarray[MAXWIDTH];
uint16_t lightarray[MAXWIDTH];
uint32_t dynlights[MAXWIDTH];
static PolyTriangleThreadData *Get(DrawerThread *thread);

View file

@ -378,23 +378,16 @@ void ScreenTriangle::Draw(const TriDrawTriangleArgs *args, PolyTriangleThreadDat
}
}
template<typename ModeT, typename OptT>
void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread)
template<typename ModeT, typename OptT, int BitsPerPixel>
void StepSpan(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread)
{
using namespace TriScreenDrawerModes;
uint32_t fixedlight;
uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate;
fixed_t fuzzscale;
int _fuzzpos;
const uint32_t *texPixels, *translation;
int texWidth, texHeight;
uint32_t fillcolor;
int actoralpha;
float v1X, v1Y, v1W, v1U, v1V, v1WorldX, v1WorldY, v1WorldZ;
float startX, startY;
float stepW, stepU, stepV, stepWorldX, stepWorldY, stepWorldZ;
float posW, posU, posV, posWorldX, posWorldY, posWorldZ;
int texWidth, texHeight;
uint32_t light;
fixed_t shade, lightpos, lightstep;
@ -403,31 +396,9 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
float *worldposZ = thread->worldposZ;
uint32_t *texel = thread->texel;
int32_t *texelV = thread->texelV;
fixed_t *lightarray = thread->lightarray;
uint16_t *lightarray = thread->lightarray;
uint32_t *dynlights = thread->dynlights;
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
{
texPixels = (const uint32_t*)args->uniforms->TexturePixels();
texWidth = args->uniforms->TextureWidth();
texHeight = args->uniforms->TextureHeight();
}
if (ModeT::SWFlags & SWSTYLEF_Translated)
{
translation = (const uint32_t*)args->uniforms->Translation();
}
if ((ModeT::SWFlags & SWSTYLEF_Fill) || (ModeT::SWFlags & SWSTYLEF_Skycap) || (ModeT::Flags & STYLEF_ColorIsFixed))
{
fillcolor = args->uniforms->Color();
}
if (!(ModeT::Flags & STYLEF_Alpha1))
{
actoralpha = args->uniforms->Alpha();
}
v1X = args->v1->x;
v1Y = args->v1->y;
v1W = args->v1->w;
@ -442,6 +413,12 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
posU = v1U + stepU * startX + args->gradientY.U * startY;
posV = v1V + stepV * startX + args->gradientY.V * startY;
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
{
texWidth = args->uniforms->TextureWidth();
texHeight = args->uniforms->TextureHeight();
}
if (OptT::Flags & SWOPT_DynLights)
{
v1WorldX = args->v1->worldX * v1W;
@ -463,8 +440,128 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
lightpos = (fixed_t)(globVis * posW * (float)FRACUNIT);
lightstep = (fixed_t)(globVis * stepW * (float)FRACUNIT);
fixed_t maxvis = 24 * FRACUNIT / 32;
fixed_t maxlight = 31 * FRACUNIT / 32;
fixed_t lightend = lightpos + lightstep * (x1 - x0);
if (lightpos < maxvis && shade >= lightpos && shade - lightpos <= maxlight &&
lightend < maxvis && shade >= lightend && shade - lightend <= maxlight)
{
if (BitsPerPixel == 32)
{
lightpos += FRACUNIT - shade;
for (int x = x0; x < x1; x++)
{
lightarray[x] = lightpos >> 8;
lightpos += lightstep;
}
}
else
{
lightpos = shade - lightpos;
for (int x = x0; x < x1; x++)
{
lightarray[x] = (lightpos >> 3) & 0xffffff00;
lightpos -= lightstep;
}
}
}
else
{
if (BitsPerPixel == 32)
{
for (int x = x0; x < x1; x++)
{
lightarray[x] = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
lightpos += lightstep;
}
}
else
{
for (int x = x0; x < x1; x++)
{
lightarray[x] = (clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight) >> 3) & 0xffffff00;
lightpos += lightstep;
}
}
}
}
#ifndef NO_SSE
__m128 mposW, mposU, mposV, mstepW, mstepU, mstepV;
__m128 mposWorldX, mposWorldY, mposWorldZ, mstepWorldX, mstepWorldY, mstepWorldZ;
__m128i mtexMul1, mtexMul2;
#define SETUP_STEP_SSE(mpos,mstep,pos,step) \
mstep = _mm_load_ss(&step); \
mpos = _mm_load_ss(&pos); \
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
mpos = _mm_add_ss(mpos, mstep); \
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
mpos = _mm_add_ss(mpos, mstep); \
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(2, 1, 0, 0)); \
mpos = _mm_add_ss(mpos, mstep); \
mpos = _mm_shuffle_ps(mpos, mpos, _MM_SHUFFLE(0, 1, 2, 3)); \
mstep = _mm_mul_ss(mstep, _mm_set1_ps(4.0f)); \
mstep = _mm_shuffle_ps(mstep, mstep, _MM_SHUFFLE(0, 0, 0, 0));
SETUP_STEP_SSE(mposW, mstepW, posW, stepW);
if (OptT::Flags & SWOPT_DynLights)
{
SETUP_STEP_SSE(mposWorldX, mstepWorldX, posWorldX, stepWorldX);
SETUP_STEP_SSE(mposWorldY, mstepWorldY, posWorldY, stepWorldY);
SETUP_STEP_SSE(mposWorldZ, mstepWorldZ, posWorldZ, stepWorldZ);
}
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
{
SETUP_STEP_SSE(mposU, mstepU, posU, stepU);
SETUP_STEP_SSE(mposV, mstepV, posV, stepV);
mtexMul1 = _mm_setr_epi16(texWidth, texWidth, texWidth, texWidth, texHeight, texHeight, texHeight, texHeight);
mtexMul2 = _mm_setr_epi16(texHeight, texHeight, texHeight, texHeight, 1, 1, 1, 1);
}
#undef SETUP_STEP_SSE
for (int x = x0; x < x1; x += 4)
{
__m128 rcp_posW = _mm_rcp_ps(mposW);
if (OptT::Flags & SWOPT_DynLights)
{
_mm_storeu_ps(&worldposX[x], _mm_mul_ps(mposWorldX, rcp_posW));
_mm_storeu_ps(&worldposY[x], _mm_mul_ps(mposWorldY, rcp_posW));
_mm_storeu_ps(&worldposZ[x], _mm_mul_ps(mposWorldZ, rcp_posW));
mposWorldX = _mm_add_ps(mposWorldX, mstepWorldX);
mposWorldY = _mm_add_ps(mposWorldY, mstepWorldY);
mposWorldZ = _mm_add_ps(mposWorldZ, mstepWorldZ);
}
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
{
__m128 rcpW = _mm_mul_ps(_mm_set1_ps(0x01000000), rcp_posW);
__m128i u = _mm_cvtps_epi32(_mm_mul_ps(mposU, rcpW));
__m128i v = _mm_cvtps_epi32(_mm_mul_ps(mposV, rcpW));
_mm_storeu_si128((__m128i*)&texelV[x], v);
__m128i texelX = _mm_srli_epi32(_mm_slli_epi32(u, 8), 17);
__m128i texelY = _mm_srli_epi32(_mm_slli_epi32(v, 8), 17);
__m128i texelXY = _mm_mulhi_epu16(_mm_slli_epi16(_mm_packs_epi32(texelX, texelY), 1), mtexMul1);
__m128i texlo = _mm_mullo_epi16(texelXY, mtexMul2);
__m128i texhi = _mm_mulhi_epi16(texelXY, mtexMul2);
texelX = _mm_unpacklo_epi16(texlo, texhi);
texelY = _mm_unpackhi_epi16(texlo, texhi);
_mm_storeu_si128((__m128i*)&texel[x], _mm_add_epi32(texelX, texelY));
mposU = _mm_add_ps(mposU, mstepU);
mposV = _mm_add_ps(mposV, mstepV);
}
mposW = _mm_add_ps(mposW, mstepW);
}
#else
for (int x = x0; x < x1; x++)
{
if (OptT::Flags & SWOPT_DynLights)
@ -477,15 +574,6 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
posWorldY += stepWorldY;
posWorldZ += stepWorldZ;
}
if (!(OptT::Flags & SWOPT_FixedLight))
{
fixed_t maxvis = 24 * FRACUNIT / 32;
fixed_t maxlight = 31 * FRACUNIT / 32;
lightarray[x] = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
lightpos += lightstep;
}
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
{
float rcpW = 0x01000000 / posW;
@ -501,6 +589,7 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
posW += stepW;
}
#endif
if (OptT::Flags & SWOPT_DynLights)
{
@ -539,6 +628,61 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
worldnormalZ *= rcplen;
}
#ifndef NO_SSE
__m128 mworldnormalX = _mm_set1_ps(worldnormalX);
__m128 mworldnormalY = _mm_set1_ps(worldnormalY);
__m128 mworldnormalZ = _mm_set1_ps(worldnormalZ);
for (int x = x0; x < x1; x += 4)
{
__m128i litlo = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(dynlightcolor), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
__m128i lithi = litlo;
for (int i = 0; i < num_lights; i++)
{
__m128 lightposX = _mm_set1_ps(lights[i].x);
__m128 lightposY = _mm_set1_ps(lights[i].y);
__m128 lightposZ = _mm_set1_ps(lights[i].z);
__m128 light_radius = _mm_set1_ps(lights[i].radius);
__m128i light_color = _mm_shuffle_epi32(_mm_unpacklo_epi8(_mm_cvtsi32_si128(lights[i].color), _mm_setzero_si128()), _MM_SHUFFLE(1, 0, 1, 0));
__m128 is_attenuated = _mm_cmplt_ps(light_radius, _mm_setzero_ps());
light_radius = _mm_andnot_ps(_mm_set1_ps(-0.0f), light_radius); // clear sign bit
// L = light-pos
// dist = sqrt(dot(L, L))
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
__m128 Lx = _mm_sub_ps(lightposX, _mm_loadu_ps(&worldposX[x]));
__m128 Ly = _mm_sub_ps(lightposY, _mm_loadu_ps(&worldposY[x]));
__m128 Lz = _mm_sub_ps(lightposZ, _mm_loadu_ps(&worldposZ[x]));
__m128 dist2 = _mm_add_ps(_mm_mul_ps(Lx, Lx), _mm_add_ps(_mm_mul_ps(Ly, Ly), _mm_mul_ps(Lz, Lz)));
__m128 rcp_dist = _mm_rsqrt_ps(dist2);
__m128 dist = _mm_mul_ps(dist2, rcp_dist);
__m128 distance_attenuation = _mm_sub_ps(_mm_set1_ps(256.0f), _mm_min_ps(_mm_mul_ps(dist, light_radius), _mm_set1_ps(256.0f)));
// The simple light type
__m128 simple_attenuation = distance_attenuation;
// The point light type
// diffuse = max(dot(N,normalize(L)),0) * attenuation
Lx = _mm_mul_ps(Lx, rcp_dist);
Ly = _mm_mul_ps(Ly, rcp_dist);
Lz = _mm_mul_ps(Lz, rcp_dist);
__m128 dotNL = _mm_add_ps(_mm_add_ps(_mm_mul_ps(mworldnormalX, Lx), _mm_mul_ps(mworldnormalY, Ly)), _mm_mul_ps(mworldnormalZ, Lz));
__m128 point_attenuation = _mm_mul_ps(_mm_max_ps(dotNL, _mm_setzero_ps()), distance_attenuation);
__m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, point_attenuation), _mm_andnot_ps(is_attenuated, simple_attenuation)));
attenuation = _mm_shufflehi_epi16(_mm_shufflelo_epi16(attenuation, _MM_SHUFFLE(2, 2, 0, 0)), _MM_SHUFFLE(2, 2, 0, 0));
__m128i attenlo = _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 0, 0));
__m128i attenhi = _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(3, 3, 2, 2));
litlo = _mm_add_epi16(litlo, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenlo), 8));
lithi = _mm_add_epi16(lithi, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenhi), 8));
}
_mm_storeu_si128((__m128i*)&dynlights[x], _mm_packus_epi16(litlo, lithi));
}
#else
for (int x = x0; x < x1; x++)
{
uint32_t lit_r = RPART(dynlightcolor);
@ -596,6 +740,48 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
lit_b = MIN<uint32_t>(lit_b, 255);
dynlights[x] = MAKEARGB(255, lit_r, lit_g, lit_b);
}
#endif
}
}
template<typename ModeT, typename OptT>
void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTriangleThreadData *thread)
{
using namespace TriScreenDrawerModes;
StepSpan<ModeT, OptT, 32>(y, x0, x1, args, thread);
uint32_t fixedlight;
uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate;
fixed_t fuzzscale;
int _fuzzpos;
const uint32_t *texPixels, *translation;
uint32_t fillcolor;
int actoralpha;
uint32_t *texel = thread->texel;
int32_t *texelV = thread->texelV;
uint16_t *lightarray = thread->lightarray;
uint32_t *dynlights = thread->dynlights;
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
{
texPixels = (const uint32_t*)args->uniforms->TexturePixels();
}
if (ModeT::SWFlags & SWSTYLEF_Translated)
{
translation = (const uint32_t*)args->uniforms->Translation();
}
if ((ModeT::SWFlags & SWSTYLEF_Fill) || (ModeT::SWFlags & SWSTYLEF_Skycap) || (ModeT::Flags & STYLEF_ColorIsFixed))
{
fillcolor = args->uniforms->Color();
}
if (!(ModeT::Flags & STYLEF_Alpha1))
{
actoralpha = args->uniforms->Alpha();
}
if (OptT::Flags & SWOPT_FixedLight)
@ -650,7 +836,7 @@ void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyT
__m128i mfg = _mm_unpacklo_epi8(_mm_setr_epi32(texPixels[texel[x]], texPixels[texel[x + 1]], 0, 0), _mm_setzero_si128());
if (!(OptT::Flags & SWOPT_FixedLight))
mlightshade = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_loadl_epi64((const __m128i*)&lightarray[x]), _MM_SHUFFLE(2, 2, 0, 0)), _MM_SHUFFLE(1, 1, 0, 0));
mlightshade = _mm_shuffle_epi32(_mm_shufflelo_epi16(_mm_cvtsi32_si128(*(int*)&lightarray[x]), _MM_SHUFFLE(1, 1, 0, 0)), _MM_SHUFFLE(1, 1, 0, 0));
if (OptT::Flags & SWOPT_DynLights)
{
@ -997,226 +1183,52 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
{
using namespace TriScreenDrawerModes;
float v1X, v1Y, v1W, v1U, v1V, v1WorldX, v1WorldY, v1WorldZ;
float startX, startY;
float stepW, stepU, stepV, stepWorldX, stepWorldY, stepWorldZ;
float posW, posU, posV, posWorldX, posWorldY, posWorldZ;
StepSpan<ModeT, OptT, 8>(y, x0, x1, args, thread);
PolyLight *lights;
int num_lights;
float worldnormalX, worldnormalY, worldnormalZ;
uint32_t dynlightcolor;
uint32_t fixedlight, capcolor;
fixed_t fuzzscale;
int _fuzzpos;
const uint8_t *colormaps, *texPixels, *translation;
int texWidth, texHeight;
uint32_t fillcolor, capcolor;
int alpha;
uint32_t light;
fixed_t shade, lightpos, lightstep;
int16_t dynlights_r[MAXWIDTH / 16], dynlights_g[MAXWIDTH / 16], dynlights_b[MAXWIDTH / 16];
int16_t posdynlight_r, posdynlight_g, posdynlight_b;
fixed_t lightarray[MAXWIDTH / 16];
uint32_t fillcolor;
int actoralpha;
v1X = args->v1->x;
v1Y = args->v1->y;
v1W = args->v1->w;
v1U = args->v1->u * v1W;
v1V = args->v1->v * v1W;
startX = x0 + (0.5f - v1X);
startY = y + (0.5f - v1Y);
stepW = args->gradientX.W;
stepU = args->gradientX.U;
stepV = args->gradientX.V;
posW = v1W + stepW * startX + args->gradientY.W * startY;
posU = v1U + stepU * startX + args->gradientY.U * startY;
posV = v1V + stepV * startX + args->gradientY.V * startY;
uint32_t *texel = thread->texel;
int32_t *texelV = thread->texelV;
uint16_t *lightarray = thread->lightarray;
uint32_t *dynlights = thread->dynlights;
texPixels = args->uniforms->TexturePixels();
translation = args->uniforms->Translation();
texWidth = args->uniforms->TextureWidth();
texHeight = args->uniforms->TextureHeight();
fillcolor = args->uniforms->Color();
alpha = args->uniforms->Alpha();
colormaps = args->uniforms->BaseColormap();
light = args->uniforms->Light();
if (!(ModeT::SWFlags & SWSTYLEF_Fill) && !(ModeT::SWFlags & SWSTYLEF_FogBoundary))
{
texPixels = args->uniforms->TexturePixels();
}
if (ModeT::SWFlags & SWSTYLEF_Translated)
{
translation = args->uniforms->Translation();
}
if ((ModeT::SWFlags & SWSTYLEF_Fill) || (ModeT::SWFlags & SWSTYLEF_Skycap) || (ModeT::Flags & STYLEF_ColorIsFixed))
{
fillcolor = args->uniforms->Color();
}
if (!(ModeT::Flags & STYLEF_Alpha1))
{
actoralpha = args->uniforms->Alpha();
}
if (ModeT::SWFlags & SWSTYLEF_Skycap)
capcolor = GPalette.BaseColors[fillcolor].d;
if (OptT::Flags & SWOPT_FixedLight)
{
light += light >> 7; // 255 -> 256
light = ((256 - light) * NUMCOLORMAPS) & 0xffffff00;
}
else
{
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
lightpos = (fixed_t)(globVis * posW * (float)FRACUNIT);
lightstep = (fixed_t)(globVis * stepW * (float)FRACUNIT);
int affineOffset = x0 / 16 * 16 - x0;
lightpos = lightpos + lightstep * affineOffset;
lightstep = lightstep * 16;
fixed_t maxvis = 24 * FRACUNIT / 32;
fixed_t maxlight = 31 * FRACUNIT / 32;
for (int x = x0 / 16; x <= x1 / 16 + 1; x++)
{
lightarray[x] = (clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight) >> 8) << 5;
lightpos += lightstep;
}
int offset = x0 >> 4;
int t1 = x0 & 15;
int t0 = 16 - t1;
lightpos = (lightarray[offset] * t0 + lightarray[offset + 1] * t1);
for (int x = x0 / 16; x <= x1 / 16; x++)
{
lightarray[x] = lightarray[x + 1] - lightarray[x];
}
fixedlight = args->uniforms->Light();
fixedlight += fixedlight >> 7; // 255 -> 256
fixedlight = ((256 - fixedlight) * NUMCOLORMAPS) & 0xffffff00;
}
if (OptT::Flags & SWOPT_DynLights)
{
v1WorldX = args->v1->worldX * v1W;
v1WorldY = args->v1->worldY * v1W;
v1WorldZ = args->v1->worldZ * v1W;
stepWorldX = args->gradientX.WorldX;
stepWorldY = args->gradientX.WorldY;
stepWorldZ = args->gradientX.WorldZ;
posWorldX = v1WorldX + stepWorldX * startX + args->gradientY.WorldX * startY;
posWorldY = v1WorldY + stepWorldY * startX + args->gradientY.WorldY * startY;
posWorldZ = v1WorldZ + stepWorldZ * startX + args->gradientY.WorldZ * startY;
lights = args->uniforms->Lights();
num_lights = args->uniforms->NumLights();
worldnormalX = args->uniforms->Normal().X;
worldnormalY = args->uniforms->Normal().Y;
worldnormalZ = args->uniforms->Normal().Z;
dynlightcolor = args->uniforms->DynLightColor();
// The normal vector cannot be uniform when drawing models. Calculate and use the face normal:
if (worldnormalX == 0.0f && worldnormalY == 0.0f && worldnormalZ == 0.0f)
{
float dx1 = args->v2->worldX - args->v1->worldX;
float dy1 = args->v2->worldY - args->v1->worldY;
float dz1 = args->v2->worldZ - args->v1->worldZ;
float dx2 = args->v3->worldX - args->v1->worldX;
float dy2 = args->v3->worldY - args->v1->worldY;
float dz2 = args->v3->worldZ - args->v1->worldZ;
worldnormalX = dy1 * dz2 - dz1 * dy2;
worldnormalY = dz1 * dx2 - dx1 * dz2;
worldnormalZ = dx1 * dy2 - dy1 * dx2;
float lensqr = worldnormalX * worldnormalX + worldnormalY * worldnormalY + worldnormalZ * worldnormalZ;
#ifndef NO_SSE
float rcplen = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(lensqr)));
#else
float rcplen = 1.0f / sqrt(lensqr);
#endif
worldnormalX *= rcplen;
worldnormalY *= rcplen;
worldnormalZ *= rcplen;
}
int affineOffset = x0 / 16 * 16 - x0;
float posLightW = posW + stepW * affineOffset;
posWorldX = posWorldX + stepWorldX * affineOffset;
posWorldY = posWorldY + stepWorldY * affineOffset;
posWorldZ = posWorldZ + stepWorldZ * affineOffset;
float stepLightW = stepW * 16.0f;
stepWorldX *= 16.0f;
stepWorldY *= 16.0f;
stepWorldZ *= 16.0f;
for (int x = x0 / 16; x <= x1 / 16 + 1; x++)
{
uint32_t lit_r = RPART(dynlightcolor);
uint32_t lit_g = GPART(dynlightcolor);
uint32_t lit_b = BPART(dynlightcolor);
float rcp_posW = 1.0f / posLightW;
float worldposX = posWorldX * rcp_posW;
float worldposY = posWorldY * rcp_posW;
float worldposZ = posWorldZ * rcp_posW;
for (int i = 0; i < num_lights; i++)
{
float lightposX = lights[i].x;
float lightposY = lights[i].y;
float lightposZ = lights[i].z;
float light_radius = lights[i].radius;
uint32_t light_color = lights[i].color;
bool is_attenuated = light_radius < 0.0f;
if (is_attenuated)
light_radius = -light_radius;
// L = light-pos
// dist = sqrt(dot(L, L))
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
float Lx = lightposX - worldposX;
float Ly = lightposY - worldposY;
float Lz = lightposZ - worldposZ;
float dist2 = Lx * Lx + Ly * Ly + Lz * Lz;
#ifdef NO_SSE
//float rcp_dist = 1.0f / sqrt(dist2);
float rcp_dist = 1.0f / (dist2 * 0.01f);
#else
float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2)));
#endif
float dist = dist2 * rcp_dist;
float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f);
// The simple light type
float simple_attenuation = distance_attenuation;
// The point light type
// diffuse = max(dot(N,normalize(L)),0) * attenuation
Lx *= rcp_dist;
Ly *= rcp_dist;
Lz *= rcp_dist;
float dotNL = worldnormalX * Lx + worldnormalY * Ly + worldnormalZ * Lz;
float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation;
uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation);
lit_r += (RPART(light_color) * attenuation) >> 8;
lit_g += (GPART(light_color) * attenuation) >> 8;
lit_b += (BPART(light_color) * attenuation) >> 8;
}
lit_r = MIN<uint32_t>(lit_r, 255);
lit_g = MIN<uint32_t>(lit_g, 255);
lit_b = MIN<uint32_t>(lit_b, 255);
dynlights_r[x] = lit_r;
dynlights_g[x] = lit_g;
dynlights_b[x] = lit_b;
posLightW += stepLightW;
posWorldX += stepWorldX;
posWorldY += stepWorldY;
posWorldZ += stepWorldZ;
}
int offset = x0 >> 4;
int t1 = x0 & 15;
int t0 = 16 - t1;
posdynlight_r = (dynlights_r[offset] * t0 + dynlights_r[offset + 1] * t1);
posdynlight_g = (dynlights_g[offset] * t0 + dynlights_g[offset + 1] * t1);
posdynlight_b = (dynlights_b[offset] * t0 + dynlights_b[offset + 1] * t1);
for (int x = x0 / 16; x <= x1 / 16; x++)
{
dynlights_r[x] = dynlights_r[x + 1] - dynlights_r[x];
dynlights_g[x] = dynlights_g[x + 1] - dynlights_g[x];
dynlights_b[x] = dynlights_b[x + 1] - dynlights_b[x];
}
}
fixed_t fuzzscale;
int _fuzzpos;
if (ModeT::BlendOp == STYLEOP_Fuzz)
{
fuzzscale = (200 << FRACBITS) / viewheight;
@ -1226,19 +1238,13 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
uint8_t *dest = (uint8_t*)args->dest;
uint8_t *destLine = dest + args->pitch * y;
int x = x0;
while (x < x1)
for (int x = x0; x < x1; x++)
{
if (ModeT::BlendOp == STYLEOP_Fuzz)
{
using namespace swrenderer;
float rcpW = 0x01000000 / posW;
int32_t u = (int32_t)(posU * rcpW);
int32_t v = (int32_t)(posV * rcpW);
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0;
unsigned int sampleshadeout = (texPixels[texel[x]] != 0) ? 256 : 0;
int scaled_x = (x * fuzzscale) >> FRACBITS;
int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + _fuzzpos;
@ -1259,12 +1265,8 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
}
else if (ModeT::SWFlags & SWSTYLEF_Skycap)
{
float rcpW = 0x01000000 / posW;
int32_t u = (int32_t)(posU * rcpW);
int32_t v = (int32_t)(posV * rcpW);
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
int fg = texPixels[texelX * texHeight + texelY];
int32_t v = texelV[x];
int fg = texPixels[texel[x]];
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
@ -1301,12 +1303,11 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
uint8_t shadedfg;
if (OptT::Flags & SWOPT_FixedLight)
{
shadedfg = colormaps[light + fg];
shadedfg = colormaps[fixedlight + fg];
}
else
{
int lightshade = (lightpos >> 4) & 0xffffff00;
shadedfg = colormaps[lightshade + fg];
shadedfg = colormaps[lightarray[x] + fg];
}
destLine[x] = shadedfg;
@ -1320,12 +1321,7 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
}
else
{
float rcpW = 0x01000000 / posW;
int32_t u = (int32_t)(posU * rcpW);
int32_t v = (int32_t)(posV * rcpW);
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
fg = texPixels[texelX * texHeight + texelY];
fg = texPixels[texel[x]];
}
int fgalpha = 255;
@ -1345,7 +1341,7 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
if (!(ModeT::Flags & STYLEF_Alpha1))
{
fgalpha = (fgalpha * alpha) >> 8;
fgalpha = (fgalpha * actoralpha) >> 8;
}
if (ModeT::SWFlags & SWSTYLEF_Translated)
@ -1354,28 +1350,24 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
uint8_t shadedfg;
if (OptT::Flags & SWOPT_FixedLight)
{
shadedfg = colormaps[light + fg];
shadedfg = colormaps[fixedlight + fg];
}
else
{
int lightshade = (lightpos >> 4) & 0xffffff00;
shadedfg = colormaps[lightshade + fg];
shadedfg = colormaps[lightarray[x] + fg];
}
if (OptT::Flags & SWOPT_DynLights)
{
if (posdynlight_r | posdynlight_g | posdynlight_b)
uint32_t lit = dynlights[x];
if (lit & 0x00ffffff)
{
uint32_t lit_r = posdynlight_r >> 4;
uint32_t lit_g = posdynlight_g >> 4;
uint32_t lit_b = posdynlight_b >> 4;
uint32_t fgrgb = GPalette.BaseColors[fg];
uint32_t shadedfgrgb = GPalette.BaseColors[shadedfg];
uint32_t out_r = MIN(((RPART(fgrgb) * lit_r) >> 8) + RPART(shadedfgrgb), (uint32_t)255);
uint32_t out_g = MIN(((GPART(fgrgb) * lit_g) >> 8) + GPART(shadedfgrgb), (uint32_t)255);
uint32_t out_b = MIN(((BPART(fgrgb) * lit_b) >> 8) + BPART(shadedfgrgb), (uint32_t)255);
uint32_t out_r = MIN(((RPART(fgrgb) * RPART(lit)) >> 8) + RPART(shadedfgrgb), (uint32_t)255);
uint32_t out_g = MIN(((GPART(fgrgb) * GPART(lit)) >> 8) + GPART(shadedfgrgb), (uint32_t)255);
uint32_t out_b = MIN(((BPART(fgrgb) * BPART(lit)) >> 8) + BPART(shadedfgrgb), (uint32_t)255);
shadedfg = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)];
}
}
@ -1492,19 +1484,6 @@ void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args, PolyTr
destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)];
}
}
posW += stepW;
posU += stepU;
posV += stepV;
if (OptT::Flags & SWOPT_DynLights)
{
posdynlight_r += dynlights_r[x >> 4];
posdynlight_g += dynlights_g[x >> 4];
posdynlight_b += dynlights_b[x >> 4];
}
if (!(OptT::Flags & SWOPT_FixedLight))
lightpos += lightarray[x >> 4];
x++;
}
}