- replace softpoly block drawers with span drawers and make them use blending rules directly from render styles

- add dynlights to softpoly pal mode
This commit is contained in:
Magnus Norddahl 2018-05-29 23:32:57 +02:00
parent ac207cce85
commit 4e6226fc2d
12 changed files with 626 additions and 1383 deletions

View file

@ -52,7 +52,7 @@ void PolyDrawArgs::SetTexture(FTexture *texture, FRenderStyle style)
mTexture = texture; mTexture = texture;
mTextureWidth = texture->GetWidth(); mTextureWidth = texture->GetWidth();
mTextureHeight = texture->GetHeight(); mTextureHeight = texture->GetHeight();
if (PolyRenderer::Instance()->RenderTarget->IsBgra()) if (PolyTriangleDrawer::IsBgra())
mTexturePixels = (const uint8_t *)texture->GetPixelsBgra(); mTexturePixels = (const uint8_t *)texture->GetPixelsBgra();
else else
mTexturePixels = texture->GetPixels(style); mTexturePixels = texture->GetPixels(style);
@ -67,7 +67,7 @@ void PolyDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, FRender
FRemapTable *table = TranslationToTable(translationID); FRemapTable *table = TranslationToTable(translationID);
if (table != nullptr && !table->Inactive) if (table != nullptr && !table->Inactive)
{ {
if (PolyRenderer::Instance()->RenderTarget->IsBgra()) if (PolyTriangleDrawer::IsBgra())
mTranslation = (uint8_t*)table->Palette; mTranslation = (uint8_t*)table->Palette;
else else
mTranslation = table->Remap; mTranslation = table->Remap;
@ -121,7 +121,7 @@ void PolyDrawArgs::SetLight(FSWColormap *base_colormap, uint32_t lightlevel, dou
void PolyDrawArgs::SetColor(uint32_t bgra, uint8_t palindex) void PolyDrawArgs::SetColor(uint32_t bgra, uint8_t palindex)
{ {
if (PolyRenderer::Instance()->RenderTarget->IsBgra()) if (PolyTriangleDrawer::IsBgra())
{ {
mColor = bgra; mColor = bgra;
} }
@ -217,7 +217,7 @@ void RectDrawArgs::SetTexture(FTexture *texture, FRenderStyle style)
mTexture = texture; mTexture = texture;
mTextureWidth = texture->GetWidth(); mTextureWidth = texture->GetWidth();
mTextureHeight = texture->GetHeight(); mTextureHeight = texture->GetHeight();
if (PolyRenderer::Instance()->RenderTarget->IsBgra()) if (PolyTriangleDrawer::IsBgra())
mTexturePixels = (const uint8_t *)texture->GetPixelsBgra(); mTexturePixels = (const uint8_t *)texture->GetPixelsBgra();
else else
mTexturePixels = texture->GetPixels(style); mTexturePixels = texture->GetPixels(style);
@ -231,7 +231,7 @@ void RectDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, FRender
FRemapTable *table = TranslationToTable(translationID); FRemapTable *table = TranslationToTable(translationID);
if (table != nullptr && !table->Inactive) if (table != nullptr && !table->Inactive)
{ {
if (PolyRenderer::Instance()->RenderTarget->IsBgra()) if (PolyTriangleDrawer::IsBgra())
mTranslation = (uint8_t*)table->Palette; mTranslation = (uint8_t*)table->Palette;
else else
mTranslation = table->Remap; mTranslation = table->Remap;
@ -275,7 +275,7 @@ void RectDrawArgs::SetLight(FSWColormap *base_colormap, uint32_t lightlevel)
void RectDrawArgs::SetColor(uint32_t bgra, uint8_t palindex) void RectDrawArgs::SetColor(uint32_t bgra, uint8_t palindex)
{ {
if (PolyRenderer::Instance()->RenderTarget->IsBgra()) if (PolyTriangleDrawer::IsBgra())
{ {
mColor = bgra; mColor = bgra;
} }

View file

@ -75,7 +75,7 @@ public:
void SetWriteColor(bool enable) { mWriteColor = enable; } void SetWriteColor(bool enable) { mWriteColor = enable; }
void SetWriteStencil(bool enable, uint8_t stencilWriteValue = 0) { mWriteStencil = enable; mStencilWriteValue = stencilWriteValue; } void SetWriteStencil(bool enable, uint8_t stencilWriteValue = 0) { mWriteStencil = enable; mStencilWriteValue = stencilWriteValue; }
void SetWriteDepth(bool enable) { mWriteDepth = enable; } void SetWriteDepth(bool enable) { mWriteDepth = enable; }
void SetStyle(TriBlendMode blendmode, double alpha = 1.0) { mBlendMode = blendmode; mSrcAlpha = (uint32_t)(alpha * 256.0 + 0.5); } void SetStyle(TriBlendMode blendmode, double alpha = 1.0) { mBlendMode = blendmode; mAlpha = (uint32_t)(alpha * 256.0 + 0.5); }
void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright);
void SetColor(uint32_t bgra, uint8_t palindex); void SetColor(uint32_t bgra, uint8_t palindex);
void SetLights(PolyLight *lights, int numLights) { mLights = lights; mNumLights = numLights; } void SetLights(PolyLight *lights, int numLights) { mLights = lights; mNumLights = numLights; }
@ -107,8 +107,7 @@ public:
TriBlendMode BlendMode() const { return mBlendMode; } TriBlendMode BlendMode() const { return mBlendMode; }
uint32_t Color() const { return mColor; } uint32_t Color() const { return mColor; }
uint32_t SrcAlpha() const { return mSrcAlpha; } uint32_t Alpha() const { return mAlpha; }
uint32_t DestAlpha() const { return 256 - mSrcAlpha; }
float GlobVis() const { return mGlobVis; } float GlobVis() const { return mGlobVis; }
uint32_t Light() const { return mLight; } uint32_t Light() const { return mLight; }
@ -155,7 +154,7 @@ private:
TriBlendMode mBlendMode = TriBlendMode::Fill; TriBlendMode mBlendMode = TriBlendMode::Fill;
uint32_t mLight = 0; uint32_t mLight = 0;
uint32_t mColor = 0; uint32_t mColor = 0;
uint32_t mSrcAlpha = 0; uint32_t mAlpha = 0;
uint16_t mLightAlpha = 0; uint16_t mLightAlpha = 0;
uint16_t mLightRed = 0; uint16_t mLightRed = 0;
uint16_t mLightGreen = 0; uint16_t mLightGreen = 0;

View file

@ -328,453 +328,6 @@ namespace TriScreenDrawerModes
} }
} }
template<typename BlendT, typename SamplerT>
class TriScreenDrawer32
{
public:
static void Execute(int x, int y, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
{
using namespace TriScreenDrawerModes;
bool is_simple_shade = args->uniforms->SimpleShade();
if (SamplerT::Mode == (int)Samplers::Texture)
{
bool is_nearest_filter = args->uniforms->NearestFilter();
if (is_simple_shade)
{
if (is_nearest_filter)
DrawBlock<SimpleShade, NearestFilter>(x, y, mask0, mask1, args);
else
DrawBlock<SimpleShade, LinearFilter>(x, y, mask0, mask1, args);
}
else
{
if (is_nearest_filter)
DrawBlock<AdvancedShade, NearestFilter>(x, y, mask0, mask1, args);
else
DrawBlock<AdvancedShade, LinearFilter>(x, y, mask0, mask1, args);
}
}
else if (SamplerT::Mode == (int)Samplers::Fuzz)
{
DrawBlock<NoShade, NearestFilter>(x, y, mask0, mask1, args);
}
else // no linear filtering for translated, shaded, stencil, fill or skycap
{
if (is_simple_shade)
{
DrawBlock<SimpleShade, NearestFilter>(x, y, mask0, mask1, args);
}
else
{
DrawBlock<AdvancedShade, NearestFilter>(x, y, mask0, mask1, args);
}
}
}
private:
template<typename ShadeModeT, typename FilterModeT>
FORCEINLINE static void DrawBlock(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
{
using namespace TriScreenDrawerModes;
bool is_fixed_light = args->uniforms->FixedLight();
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff;
uint32_t srcalpha = args->uniforms->SrcAlpha();
uint32_t destalpha = args->uniforms->DestAlpha();
auto lights = args->uniforms->Lights();
auto num_lights = args->uniforms->NumLights();
FVector3 worldnormal = args->uniforms->Normal();
uint32_t dynlightcolor = args->uniforms->DynLightColor();
// Calculate gradients
const ShadedTriVertex &v1 = *args->v1;
ScreenTriangleStepVariables gradientX = args->gradientX;
ScreenTriangleStepVariables gradientY = args->gradientY;
ScreenTriangleStepVariables blockPosY;
blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y);
blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y);
blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y);
blockPosY.WorldX = v1.worldX * v1.w + gradientX.WorldX * (destX - v1.x) + gradientY.WorldX * (destY - v1.y);
blockPosY.WorldY = v1.worldY * v1.w + gradientX.WorldY * (destX - v1.x) + gradientY.WorldY * (destY - v1.y);
blockPosY.WorldZ = v1.worldZ * v1.w + gradientX.WorldZ * (destX - v1.x) + gradientY.WorldZ * (destY - v1.y);
gradientX.W *= 8.0f;
gradientX.U *= 8.0f;
gradientX.V *= 8.0f;
gradientX.WorldX *= 8.0f;
gradientX.WorldY *= 8.0f;
gradientX.WorldZ *= 8.0f;
// Output
uint32_t * RESTRICT destOrg = (uint32_t*)args->dest;
int pitch = args->pitch;
uint32_t *dest = destOrg + destX + destY * pitch;
// Light
uint32_t light = args->uniforms->Light();
float shade = 2.0f - (light + 12.0f) / 128.0f;
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
light += (light >> 7); // 255 -> 256
// Sampling stuff
uint32_t color = args->uniforms->Color();
const uint32_t * RESTRICT translation = (const uint32_t *)args->uniforms->Translation();
const uint32_t * RESTRICT texPixels = (const uint32_t *)args->uniforms->TexturePixels();
uint32_t texWidth = args->uniforms->TextureWidth();
uint32_t texHeight = args->uniforms->TextureHeight();
uint32_t oneU, oneV;
if (SamplerT::Mode != (int)Samplers::Fill)
{
oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1;
oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1;
}
else
{
oneU = 0;
oneV = 0;
}
// Shade constants
int inv_desaturate;
BgraColor shade_fade, shade_light;
int desaturate;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
shade_fade.r = args->uniforms->ShadeFadeRed();
shade_fade.g = args->uniforms->ShadeFadeGreen();
shade_fade.b = args->uniforms->ShadeFadeBlue();
shade_light.r = args->uniforms->ShadeLightRed();
shade_light.g = args->uniforms->ShadeLightGreen();
shade_light.b = args->uniforms->ShadeLightBlue();
desaturate = args->uniforms->ShadeDesaturate();
inv_desaturate = 256 - desaturate;
}
else
{
inv_desaturate = 0;
shade_fade.r = 0;
shade_fade.g = 0;
shade_fade.b = 0;
shade_light.r = 0;
shade_light.g = 0;
shade_light.b = 0;
desaturate = 0;
}
if (mask0 == 0xffffffff && mask1 == 0xffffffff)
{
for (int y = 0; y < 8; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
FVector3 worldpos = FVector3(blockPosY.WorldX, blockPosY.WorldY, blockPosY.WorldZ) / blockPosY.W;
BgraColor dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
blockPosX.WorldX += gradientX.WorldX;
blockPosX.WorldY += gradientX.WorldY;
blockPosX.WorldZ += gradientX.WorldZ;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
worldpos = FVector3(blockPosX.WorldX, blockPosX.WorldY, blockPosX.WorldZ) / blockPosX.W;
BgraColor dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
BgraColor dynlightstep;
dynlightstep.r = int32_t(dynlightnext.r - dynlight.r) >> 3;
dynlightstep.g = int32_t(dynlightnext.g - dynlight.g) >> 3;
dynlightstep.b = int32_t(dynlightnext.b - dynlight.b) >> 3;
for (int ix = 0; ix < 8; ix++)
{
// Load bgcolor
BgraColor bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
bgcolor = dest[ix];
else
bgcolor = 0;
// Sample fgcolor
if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[ix];
unsigned int ifgcolor = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
unsigned int ifgshade = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + ix, destY + y);
posU += stepU;
posV += stepV;
// Setup light
int lightpos0 = lightpos >> 8;
lightpos += lightstep;
BgraColor mlight;
mlight.r = lightpos0;
mlight.g = lightpos0;
mlight.b = lightpos0;
BgraColor shade_fade_lit;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
uint32_t inv_light = 256 - lightpos0;
shade_fade_lit.r = shade_fade.r * inv_light;
shade_fade_lit.g = shade_fade.g * inv_light;
shade_fade_lit.b = shade_fade.b * inv_light;
}
else
{
shade_fade_lit.r = 0;
shade_fade_lit.g = 0;
shade_fade_lit.b = 0;
}
// Shade and blend
BgraColor fgcolor = Shade32<ShadeModeT>(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
BgraColor outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha);
// Store result
dest[ix] = outcolor;
dynlight.r = MAX<int32_t>(dynlight.r + dynlightstep.r, 0);
dynlight.g = MAX<int32_t>(dynlight.g + dynlightstep.g, 0);
dynlight.b = MAX<int32_t>(dynlight.b + dynlightstep.b, 0);
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
blockPosY.WorldX += gradientY.WorldX;
blockPosY.WorldY += gradientY.WorldY;
blockPosY.WorldZ += gradientY.WorldZ;
dest += pitch;
}
}
else
{
// mask0 loop:
for (int y = 0; y < 4; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
FVector3 worldpos = FVector3(blockPosY.WorldX, blockPosY.WorldY, blockPosY.WorldZ) / blockPosY.W;
BgraColor dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
blockPosX.WorldX += gradientX.WorldX;
blockPosX.WorldY += gradientX.WorldY;
blockPosX.WorldZ += gradientX.WorldZ;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
worldpos = FVector3(blockPosX.WorldX, blockPosX.WorldY, blockPosX.WorldZ) / blockPosX.W;
BgraColor dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
BgraColor dynlightstep;
dynlightstep.r = int32_t(dynlightnext.r - dynlight.r) >> 3;
dynlightstep.g = int32_t(dynlightnext.g - dynlight.g) >> 3;
dynlightstep.b = int32_t(dynlightnext.b - dynlight.b) >> 3;
for (int x = 0; x < 8; x++)
{
// Load bgcolor
BgraColor bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
{
if (mask0 & (1 << 31)) bgcolor = dest[x];
}
else
bgcolor = 0;
// Sample fgcolor
if (SamplerT::Mode == (int)Samplers::FogBoundary && (mask0 & (1 << 31))) color = dest[x];
unsigned int ifgcolor = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
unsigned int ifgshade = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + x, destY + y);
posU += stepU;
posV += stepV;
// Setup light
int lightpos0 = lightpos >> 8;
lightpos += lightstep;
BgraColor mlight;
mlight.r = lightpos0;
mlight.g = lightpos0;
mlight.b = lightpos0;
BgraColor shade_fade_lit;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
uint32_t inv_light = 256 - lightpos0;
shade_fade_lit.r = shade_fade.r * inv_light;
shade_fade_lit.g = shade_fade.g * inv_light;
shade_fade_lit.b = shade_fade.b * inv_light;
}
else
{
shade_fade_lit.r = 0;
shade_fade_lit.g = 0;
shade_fade_lit.b = 0;
}
// Shade and blend
BgraColor fgcolor = Shade32<ShadeModeT>(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
BgraColor outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha);
// Store result
if (mask0 & (1 << 31)) dest[x] = outcolor;
mask0 <<= 1;
dynlight.r = MAX<int32_t>(dynlight.r + dynlightstep.r, 0);
dynlight.g = MAX<int32_t>(dynlight.g + dynlightstep.g, 0);
dynlight.b = MAX<int32_t>(dynlight.b + dynlightstep.b, 0);
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
blockPosY.WorldX += gradientY.WorldX;
blockPosY.WorldY += gradientY.WorldY;
blockPosY.WorldZ += gradientY.WorldZ;
dest += pitch;
}
// mask1 loop:
for (int y = 0; y < 4; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
FVector3 worldpos = FVector3(blockPosY.WorldX, blockPosY.WorldY, blockPosY.WorldZ) / blockPosY.W;
BgraColor dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
blockPosX.WorldX += gradientX.WorldX;
blockPosX.WorldY += gradientX.WorldY;
blockPosX.WorldZ += gradientX.WorldZ;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
worldpos = FVector3(blockPosX.WorldX, blockPosX.WorldY, blockPosX.WorldZ) / blockPosX.W;
BgraColor dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
BgraColor dynlightstep;
dynlightstep.r = int32_t(dynlightnext.r - dynlight.r) >> 3;
dynlightstep.g = int32_t(dynlightnext.g - dynlight.g) >> 3;
dynlightstep.b = int32_t(dynlightnext.b - dynlight.b) >> 3;
for (int x = 0; x < 8; x++)
{
// Load bgcolor
BgraColor bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
{
if (mask1 & (1 << 31)) bgcolor = dest[x];
}
else
bgcolor = 0;
// Sample fgcolor
if (SamplerT::Mode == (int)Samplers::FogBoundary && (mask1 & (1 << 31))) color = dest[x];
unsigned int ifgcolor = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
unsigned int ifgshade = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + x, destY + 4 + y);
posU += stepU;
posV += stepV;
// Setup light
int lightpos0 = lightpos >> 8;
lightpos += lightstep;
BgraColor mlight;
mlight.r = lightpos0;
mlight.g = lightpos0;
mlight.b = lightpos0;
BgraColor shade_fade_lit;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
uint32_t inv_light = 256 - lightpos0;
shade_fade_lit.r = shade_fade.r * inv_light;
shade_fade_lit.g = shade_fade.g * inv_light;
shade_fade_lit.b = shade_fade.b * inv_light;
}
else
{
shade_fade_lit.r = 0;
shade_fade_lit.g = 0;
shade_fade_lit.b = 0;
}
// Shade and blend
BgraColor fgcolor = Shade32<ShadeModeT>(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
BgraColor outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha);
// Store result
if (mask1 & (1 << 31)) dest[x] = outcolor;
mask1 <<= 1;
dynlight.r = MAX<int32_t>(dynlight.r + dynlightstep.r, 0);
dynlight.g = MAX<int32_t>(dynlight.g + dynlightstep.g, 0);
dynlight.b = MAX<int32_t>(dynlight.b + dynlightstep.b, 0);
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
blockPosY.WorldX += gradientY.WorldX;
blockPosY.WorldY += gradientY.WorldY;
blockPosY.WorldZ += gradientY.WorldZ;
dest += pitch;
}
}
}
};
template<typename BlendT, typename SamplerT> template<typename BlendT, typename SamplerT>
class RectScreenDrawer32 class RectScreenDrawer32
{ {

View file

@ -347,461 +347,6 @@ namespace TriScreenDrawerModes
} }
} }
template<typename BlendT, typename SamplerT>
class TriScreenDrawer32
{
public:
static void Execute(int x, int y, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
{
using namespace TriScreenDrawerModes;
bool is_simple_shade = args->uniforms->SimpleShade();
if (SamplerT::Mode == (int)Samplers::Texture)
{
bool is_nearest_filter = args->uniforms->NearestFilter();
if (is_simple_shade)
{
if (is_nearest_filter)
DrawBlock<SimpleShade, NearestFilter>(x, y, mask0, mask1, args);
else
DrawBlock<SimpleShade, LinearFilter>(x, y, mask0, mask1, args);
}
else
{
if (is_nearest_filter)
DrawBlock<AdvancedShade, NearestFilter>(x, y, mask0, mask1, args);
else
DrawBlock<AdvancedShade, LinearFilter>(x, y, mask0, mask1, args);
}
}
else if (SamplerT::Mode == (int)Samplers::Fuzz)
{
DrawBlock<NoShade, NearestFilter>(x, y, mask0, mask1, args);
}
else // no linear filtering for translated, shaded, stencil, fill or skycap
{
if (is_simple_shade)
{
DrawBlock<SimpleShade, NearestFilter>(x, y, mask0, mask1, args);
}
else
{
DrawBlock<AdvancedShade, NearestFilter>(x, y, mask0, mask1, args);
}
}
}
private:
template<typename ShadeModeT, typename FilterModeT>
FORCEINLINE static void VECTORCALL DrawBlock(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
{
using namespace TriScreenDrawerModes;
bool is_fixed_light = args->uniforms->FixedLight();
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff;
uint32_t srcalpha = args->uniforms->SrcAlpha();
uint32_t destalpha = args->uniforms->DestAlpha();
auto lights = args->uniforms->Lights();
auto num_lights = args->uniforms->NumLights();
__m128 worldnormal = _mm_setr_ps(args->uniforms->Normal().X, args->uniforms->Normal().Y, args->uniforms->Normal().Z, 0.0f);
uint32_t dynlightcolor = args->uniforms->DynLightColor();
// Calculate gradients
const ShadedTriVertex &v1 = *args->v1;
ScreenTriangleStepVariables gradientX = args->gradientX;
ScreenTriangleStepVariables gradientY = args->gradientY;
ScreenTriangleStepVariables blockPosY;
blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y);
blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y);
blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y);
blockPosY.WorldX = v1.worldX * v1.w + gradientX.WorldX * (destX - v1.x) + gradientY.WorldX * (destY - v1.y);
blockPosY.WorldY = v1.worldY * v1.w + gradientX.WorldY * (destX - v1.x) + gradientY.WorldY * (destY - v1.y);
blockPosY.WorldZ = v1.worldZ * v1.w + gradientX.WorldZ * (destX - v1.x) + gradientY.WorldZ * (destY - v1.y);
gradientX.W *= 8.0f;
gradientX.U *= 8.0f;
gradientX.V *= 8.0f;
gradientX.WorldX *= 8.0f;
gradientX.WorldY *= 8.0f;
gradientX.WorldZ *= 8.0f;
// Output
uint32_t * RESTRICT destOrg = (uint32_t*)args->dest;
int pitch = args->pitch;
uint32_t *dest = destOrg + destX + destY * pitch;
// Light
uint32_t light = args->uniforms->Light();
float shade = 2.0f - (light + 12.0f) / 128.0f;
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
light += (light >> 7); // 255 -> 256
// Sampling stuff
uint32_t color = args->uniforms->Color();
const uint32_t * RESTRICT translation = (const uint32_t *)args->uniforms->Translation();
const uint32_t * RESTRICT texPixels = (const uint32_t *)args->uniforms->TexturePixels();
uint32_t texWidth = args->uniforms->TextureWidth();
uint32_t texHeight = args->uniforms->TextureHeight();
uint32_t oneU, oneV;
if (SamplerT::Mode != (int)Samplers::Fill)
{
oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1;
oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1;
}
else
{
oneU = 0;
oneV = 0;
}
// Shade constants
__m128i inv_desaturate, shade_fade, shade_light;
int desaturate;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
inv_desaturate = _mm_setr_epi16(256, 256 - args->uniforms->ShadeDesaturate(), 256 - args->uniforms->ShadeDesaturate(), 256 - args->uniforms->ShadeDesaturate(), 256, 256 - args->uniforms->ShadeDesaturate(), 256 - args->uniforms->ShadeDesaturate(), 256 - args->uniforms->ShadeDesaturate());
shade_fade = _mm_set_epi16(args->uniforms->ShadeFadeAlpha(), args->uniforms->ShadeFadeRed(), args->uniforms->ShadeFadeGreen(), args->uniforms->ShadeFadeBlue(), args->uniforms->ShadeFadeAlpha(), args->uniforms->ShadeFadeRed(), args->uniforms->ShadeFadeGreen(), args->uniforms->ShadeFadeBlue());
shade_light = _mm_set_epi16(args->uniforms->ShadeLightAlpha(), args->uniforms->ShadeLightRed(), args->uniforms->ShadeLightGreen(), args->uniforms->ShadeLightBlue(), args->uniforms->ShadeLightAlpha(), args->uniforms->ShadeLightRed(), args->uniforms->ShadeLightGreen(), args->uniforms->ShadeLightBlue());
desaturate = args->uniforms->ShadeDesaturate();
}
else
{
inv_desaturate = _mm_setzero_si128();
shade_fade = _mm_setzero_si128();
shade_fade = _mm_setzero_si128();
shade_light = _mm_setzero_si128();
desaturate = 0;
}
if (mask0 == 0xffffffff && mask1 == 0xffffffff)
{
for (int y = 0; y < 8; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
blockPosX.WorldX += gradientX.WorldX;
blockPosX.WorldY += gradientX.WorldY;
blockPosX.WorldZ += gradientX.WorldZ;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff,0xffff,0,0))), _mm_set1_epi16(256)), _mm_setzero_si128());
dynlightstep = _mm_slli_epi16(dynlightstep, 1);
for (int ix = 0; ix < 4; ix++)
{
// Load bgcolor
__m128i bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + ix * 2)), _mm_setzero_si128());
else
bgcolor = _mm_setzero_si128();
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[ix * 2];
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + ix * 2, destY + y);
posU += stepU;
posV += stepV;
if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[ix * 2 + 1];
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + ix * 2 + 1, destY + y);
posU += stepU;
posV += stepV;
// Setup light
int lightpos0 = lightpos >> 8;
lightpos += lightstep;
int lightpos1 = lightpos >> 8;
lightpos += lightstep;
__m128i mlight = _mm_set_epi16(256, lightpos1, lightpos1, lightpos1, 256, lightpos0, lightpos0, lightpos0);
__m128i shade_fade_lit;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
__m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight);
shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light);
}
else
{
shade_fade_lit = _mm_setzero_si128();
}
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
_mm_storel_epi64((__m128i*)(dest + ix * 2), outcolor);
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
blockPosY.WorldX += gradientY.WorldX;
blockPosY.WorldY += gradientY.WorldY;
blockPosY.WorldZ += gradientY.WorldZ;
dest += pitch;
}
}
else
{
// mask0 loop:
for (int y = 0; y < 4; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
blockPosX.WorldX += gradientX.WorldX;
blockPosX.WorldY += gradientX.WorldY;
blockPosX.WorldZ += gradientX.WorldZ;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff, 0xffff, 0, 0))), _mm_set1_epi16(256)), _mm_setzero_si128());
dynlightstep = _mm_slli_epi16(dynlightstep, 1);
for (int x = 0; x < 4; x++)
{
// Load bgcolor
uint32_t desttmp[2];
__m128i bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
{
if (mask0 & (1 << 31)) desttmp[0] = dest[x * 2];
if (mask0 & (1 << 30)) desttmp[1] = dest[x * 2 + 1];
bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128());
}
else
bgcolor = _mm_setzero_si128();
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[x * 2];
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + x * 2, destY + y);
posU += stepU;
posV += stepV;
if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[x * 2 + 1];
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + x * 2 + 1, destY + y);
posU += stepU;
posV += stepV;
// Setup light
int lightpos0 = lightpos >> 8;
lightpos += lightstep;
int lightpos1 = lightpos >> 8;
lightpos += lightstep;
__m128i mlight = _mm_set_epi16(256, lightpos1, lightpos1, lightpos1, 256, lightpos0, lightpos0, lightpos0);
__m128i shade_fade_lit;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
__m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight);
shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light);
}
else
{
shade_fade_lit = _mm_setzero_si128();
}
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
_mm_storel_epi64((__m128i*)desttmp, outcolor);
if (mask0 & (1 << 31)) dest[x * 2] = desttmp[0];
if (mask0 & (1 << 30)) dest[x * 2 + 1] = desttmp[1];
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());
mask0 <<= 2;
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
blockPosY.WorldX += gradientY.WorldX;
blockPosY.WorldY += gradientY.WorldY;
blockPosY.WorldZ += gradientY.WorldZ;
dest += pitch;
}
// mask1 loop:
for (int y = 0; y < 4; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
__m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W);
__m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW);
__m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
blockPosX.WorldX += gradientX.WorldX;
blockPosX.WorldY += gradientX.WorldY;
blockPosX.WorldZ += gradientX.WorldZ;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
mrcpW = _mm_set1_ps(1.0f / blockPosX.W);
worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW);
__m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor);
__m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3);
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff, 0xffff, 0, 0))), _mm_set1_epi16(256)), _mm_setzero_si128());
dynlightstep = _mm_slli_epi16(dynlightstep, 1);
for (int x = 0; x < 4; x++)
{
// Load bgcolor
uint32_t desttmp[2];
__m128i bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
{
if (mask1 & (1 << 31)) desttmp[0] = dest[x * 2];
if (mask1 & (1 << 30)) desttmp[1] = dest[x * 2 + 1];
bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128());
}
else
bgcolor = _mm_setzero_si128();
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
if (SamplerT::Mode == (int)Samplers::FogBoundary && (mask1 & (1 << 31))) color = dest[x * 2];
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + x * 2, destY + 4 + y);
posU += stepU;
posV += stepV;
if (SamplerT::Mode == (int)Samplers::FogBoundary && (mask1 & (1 << 30))) color = dest[x * 2 + 1];
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + x * 2 + 1, destY + 4 + y);
posU += stepU;
posV += stepV;
// Setup light
int lightpos0 = lightpos >> 8;
lightpos += lightstep;
int lightpos1 = lightpos >> 8;
lightpos += lightstep;
__m128i mlight = _mm_set_epi16(256, lightpos1, lightpos1, lightpos1, 256, lightpos0, lightpos0, lightpos0);
__m128i shade_fade_lit;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
__m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight);
shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light);
}
else
{
shade_fade_lit = _mm_setzero_si128();
}
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
_mm_storel_epi64((__m128i*)desttmp, outcolor);
if (mask1 & (1 << 31)) dest[x * 2] = desttmp[0];
if (mask1 & (1 << 30)) dest[x * 2 + 1] = desttmp[1];
dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128());
mask1 <<= 2;
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
blockPosY.WorldX += gradientY.WorldX;
blockPosY.WorldY += gradientY.WorldY;
blockPosY.WorldZ += gradientY.WorldZ;
dest += pitch;
}
}
}
};
template<typename BlendT, typename SamplerT> template<typename BlendT, typename SamplerT>
class RectScreenDrawer32 class RectScreenDrawer32
{ {

View file

@ -225,206 +225,6 @@ namespace TriScreenDrawerModes
} }
} }
template<typename BlendT, typename SamplerT>
class TriScreenDrawer8
{
public:
static void Execute(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args)
{
using namespace TriScreenDrawerModes;
bool is_fixed_light = args->uniforms->FixedLight();
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff;
auto colormaps = args->uniforms->BaseColormap();
uint32_t srcalpha = args->uniforms->SrcAlpha();
uint32_t destalpha = args->uniforms->DestAlpha();
// Calculate gradients
const ShadedTriVertex &v1 = *args->v1;
ScreenTriangleStepVariables gradientX = args->gradientX;
ScreenTriangleStepVariables gradientY = args->gradientY;
ScreenTriangleStepVariables blockPosY;
blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y);
blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y);
blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y);
gradientX.W *= 8.0f;
gradientX.U *= 8.0f;
gradientX.V *= 8.0f;
// Output
uint8_t * RESTRICT destOrg = args->dest;
int pitch = args->pitch;
uint8_t *dest = destOrg + destX + destY * pitch;
// Light
uint32_t light = args->uniforms->Light();
float shade = 2.0f - (light + 12.0f) / 128.0f;
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
light += light >> 7; // 255 -> 256
// Sampling stuff
uint32_t color = args->uniforms->Color();
const uint8_t * RESTRICT translation = args->uniforms->Translation();
const uint8_t * RESTRICT texPixels = args->uniforms->TexturePixels();
uint32_t texWidth = args->uniforms->TextureWidth();
uint32_t texHeight = args->uniforms->TextureHeight();
if (mask0 == 0xffffffff && mask1 == 0xffffffff)
{
for (int y = 0; y < 8; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
for (int ix = 0; ix < 8; ix++)
{
int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[ix];
if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor;
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + ix, destY + y);
if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256;
dest[ix] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
posU += stepU;
posV += stepV;
lightpos += lightstep;
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
dest += pitch;
}
}
else
{
// mask0 loop:
for (int y = 0; y < 4; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
for (int x = 0; x < 8; x++)
{
if (mask0 & (1 << 31))
{
int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x];
if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor;
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + x, destY + y);
if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256;
dest[x] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
}
posU += stepU;
posV += stepV;
lightpos += lightstep;
mask0 <<= 1;
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
dest += pitch;
}
// mask1 loop:
for (int y = 0; y < 4; y++)
{
float rcpW = 0x01000000 / blockPosY.W;
int32_t posU = (int32_t)(blockPosY.U * rcpW);
int32_t posV = (int32_t)(blockPosY.V * rcpW);
fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask);
ScreenTriangleStepVariables blockPosX = blockPosY;
blockPosX.W += gradientX.W;
blockPosX.U += gradientX.U;
blockPosX.V += gradientX.V;
rcpW = 0x01000000 / blockPosX.W;
int32_t nextU = (int32_t)(blockPosX.U * rcpW);
int32_t nextV = (int32_t)(blockPosX.V * rcpW);
int32_t stepU = (nextU - posU) / 8;
int32_t stepV = (nextV - posV) / 8;
fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightstep = (lightnext - lightpos) / 8;
lightstep = lightstep & lightmask;
for (int x = 0; x < 8; x++)
{
if (mask1 & (1 << 31))
{
int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x];
if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor;
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, destX + x, destY + 4 + y);
if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256;
dest[x] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
}
posU += stepU;
posV += stepV;
lightpos += lightstep;
mask1 <<= 1;
}
blockPosY.W += gradientY.W;
blockPosY.U += gradientY.U;
blockPosY.V += gradientY.V;
dest += pitch;
}
}
}
};
template<typename BlendT, typename SamplerT> template<typename BlendT, typename SamplerT>
class RectScreenDrawer8 class RectScreenDrawer8
{ {

View file

@ -39,12 +39,19 @@
#include "screen_triangle.h" #include "screen_triangle.h"
#include "x86.h" #include "x86.h"
static bool isBgraRenderTarget = false;
void PolyTriangleDrawer::ClearBuffers(DCanvas *canvas) void PolyTriangleDrawer::ClearBuffers(DCanvas *canvas)
{ {
PolyStencilBuffer::Instance()->Clear(canvas->GetWidth(), canvas->GetHeight(), 0); PolyStencilBuffer::Instance()->Clear(canvas->GetWidth(), canvas->GetHeight(), 0);
PolyZBuffer::Instance()->Resize(canvas->GetPitch(), canvas->GetHeight()); PolyZBuffer::Instance()->Resize(canvas->GetPitch(), canvas->GetHeight());
} }
bool PolyTriangleDrawer::IsBgra()
{
return isBgraRenderTarget;
}
void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers) void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers)
{ {
uint8_t *dest = (uint8_t*)canvas->GetPixels(); uint8_t *dest = (uint8_t*)canvas->GetPixels();
@ -52,6 +59,7 @@ void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x,
int dest_height = canvas->GetHeight(); int dest_height = canvas->GetHeight();
int dest_pitch = canvas->GetPitch(); int dest_pitch = canvas->GetPitch();
bool dest_bgra = canvas->IsBgra(); bool dest_bgra = canvas->IsBgra();
isBgraRenderTarget = dest_bgra;
int offsetx = clamp(x, 0, dest_width); int offsetx = clamp(x, 0, dest_width);
int offsety = clamp(y, 0, dest_height); int offsety = clamp(y, 0, dest_height);

View file

@ -38,6 +38,8 @@ public:
static void SetTwoSided(const DrawerCommandQueuePtr &queue, bool twosided); static void SetTwoSided(const DrawerCommandQueuePtr &queue, bool twosided);
static void SetWeaponScene(const DrawerCommandQueuePtr &queue, bool enable); static void SetWeaponScene(const DrawerCommandQueuePtr &queue, bool enable);
static void SetTransform(const DrawerCommandQueuePtr &queue, const Mat4f *objectToClip); static void SetTransform(const DrawerCommandQueuePtr &queue, const Mat4f *objectToClip);
static bool IsBgra();
}; };
class PolyTriangleThreadData class PolyTriangleThreadData

View file

@ -307,7 +307,7 @@ void TriangleBlock::RenderBlock(int x0, int y0, int x1, int y1)
bool writeDepth = args->uniforms->WriteDepth(); bool writeDepth = args->uniforms->WriteDepth();
int bmode = (int)args->uniforms->BlendMode(); int bmode = (int)args->uniforms->BlendMode();
auto drawFunc = args->destBgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode]; auto drawFunc = args->destBgra ? ScreenTriangle::SpanDrawers32[bmode] : ScreenTriangle::SpanDrawers8[bmode];
// Loop through blocks // Loop through blocks
for (int y = start_miny; y < y1; y += q * num_cores) for (int y = start_miny; y < y1; y += q * num_cores)
@ -345,7 +345,66 @@ void TriangleBlock::RenderBlock(int x0, int y0, int x1, int y1)
} }
if (writeColor) if (writeColor)
drawFunc(X, Y, Mask0, Mask1, args); {
if (Mask0 == 0xffffffff)
{
drawFunc(Y, X, X + 8, args);
drawFunc(Y + 1, X, X + 8, args);
drawFunc(Y + 2, X, X + 8, args);
drawFunc(Y + 3, X, X + 8, args);
}
else if (Mask0 != 0)
{
uint32_t mask = Mask0;
for (int j = 0; j < 4; j++)
{
int start = 0;
int i;
for (i = 0; i < 8; i++)
{
if (!(mask & 0x80000000))
{
if (i > start)
drawFunc(Y + j, X + start, X + i, args);
start = i + 1;
}
mask <<= 1;
}
if (i > start)
drawFunc(Y + j, X + start, X + i, args);
}
}
if (Mask1 == 0xffffffff)
{
drawFunc(Y + 4, X, X + 8, args);
drawFunc(Y + 5, X, X + 8, args);
drawFunc(Y + 6, X, X + 8, args);
drawFunc(Y + 7, X, X + 8, args);
}
else if (Mask1 != 0)
{
uint32_t mask = Mask1;
for (int j = 4; j < 8; j++)
{
int start = 0;
int i;
for (i = 0; i < 8; i++)
{
if (!(mask & 0x80000000))
{
if (i > start)
drawFunc(Y + j, X + start, X + i, args);
start = i + 1;
}
mask <<= 1;
}
if (i > start)
drawFunc(Y + j, X + start, X + i, args);
}
}
}
if (writeStencil) if (writeStencil)
StencilWrite(); StencilWrite();
if (writeDepth) if (writeDepth)
@ -1249,210 +1308,102 @@ void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleT
} }
} }
template<typename ModeT> template<typename ModeT, typename OptT>
void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args)
{ {
using namespace TriScreenDrawerModes; using namespace TriScreenDrawerModes;
float v1X = args->v1->x; float v1X, v1Y, v1W, v1U, v1V, v1WorldX, v1WorldY, v1WorldZ;
float v1Y = args->v1->y; float startX, startY;
float v1W = args->v1->w; float stepXW, stepXU, stepXV, stepWorldX, stepWorldY, stepWorldZ;
float v1U = args->v1->u * v1W; float posXW, posXU, posXV, posWorldX, posWorldY, posWorldZ;
float v1V = args->v1->v * v1W;
float stepXW = args->gradientX.W;
float stepXU = args->gradientX.U;
float stepXV = args->gradientX.V;
float startX = x0 + (0.5f - v1X);
float startY = y + (0.5f - v1Y);
float posXW = v1W + stepXW * startX + args->gradientY.W * startY;
float posXU = v1U + stepXU * startX + args->gradientY.U * startY;
float posXV = v1V + stepXV * startX + args->gradientY.V * startY;
const uint32_t *texPixels = (const uint32_t*)args->uniforms->TexturePixels(); PolyLight *lights;
const uint32_t *translation = (const uint32_t*)args->uniforms->Translation(); int num_lights;
int texWidth = args->uniforms->TextureWidth(); float worldnormalX, worldnormalY, worldnormalZ;
int texHeight = args->uniforms->TextureHeight(); uint32_t dynlightcolor;
const uint32_t *texPixels, *translation;
int texWidth, texHeight;
uint32_t fillcolor;
int alpha;
uint32_t light;
fixed_t shade, lightpos, lightstep;
uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate;
int fillcolor = args->uniforms->Color(); v1X = args->v1->x;
int alpha = args->uniforms->SrcAlpha(); v1Y = args->v1->y;
v1W = args->v1->w;
v1U = args->v1->u * v1W;
v1V = args->v1->v * v1W;
startX = x0 + (0.5f - v1X);
startY = y + (0.5f - v1Y);
stepXW = args->gradientX.W;
stepXU = args->gradientX.U;
stepXV = args->gradientX.V;
posXW = v1W + stepXW * startX + args->gradientY.W * startY;
posXU = v1U + stepXU * startX + args->gradientY.U * startY;
posXV = v1V + stepXV * startX + args->gradientY.V * startY;
bool is_fixed_light = args->uniforms->FixedLight(); texPixels = (const uint32_t*)args->uniforms->TexturePixels();
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; translation = (const uint32_t*)args->uniforms->Translation();
uint32_t light = args->uniforms->Light(); texWidth = args->uniforms->TextureWidth();
float shade = 2.0f - (light + 12.0f) / 128.0f; texHeight = args->uniforms->TextureHeight();
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f); fillcolor = args->uniforms->Color();
alpha = args->uniforms->Alpha();
light = args->uniforms->Light();
if (OptT::Flags & SWOPT_FixedLight)
{
light += light >> 7; // 255 -> 256 light += light >> 7; // 255 -> 256
}
else
{
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
lightpos = (fixed_t)(globVis * posXW * (float)FRACUNIT);
lightstep = (fixed_t)(globVis * stepXW * (float)FRACUNIT);
}
if (OptT::Flags & SWOPT_DynLights)
{
v1WorldX = args->v1->worldX * v1W;
v1WorldY = args->v1->worldY * v1W;
v1WorldZ = args->v1->worldZ * v1W;
stepWorldX = args->gradientX.WorldX;
stepWorldY = args->gradientX.WorldY;
stepWorldZ = args->gradientX.WorldZ;
posWorldX = v1WorldX + stepWorldX * startX + args->gradientY.WorldX * startY;
posWorldY = v1WorldY + stepWorldY * startX + args->gradientY.WorldY * startY;
posWorldZ = v1WorldZ + stepWorldZ * startX + args->gradientY.WorldZ * startY;
lights = args->uniforms->Lights();
num_lights = args->uniforms->NumLights();
worldnormalX = args->uniforms->Normal().X;
worldnormalY = args->uniforms->Normal().Y;
worldnormalZ = args->uniforms->Normal().Z;
dynlightcolor = args->uniforms->DynLightColor();
}
if (OptT::Flags & SWOPT_ColoredFog)
{
shade_fade_r = args->uniforms->ShadeFadeRed();
shade_fade_g = args->uniforms->ShadeFadeGreen();
shade_fade_b = args->uniforms->ShadeFadeBlue();
shade_light_r = args->uniforms->ShadeLightRed();
shade_light_g = args->uniforms->ShadeLightGreen();
shade_light_b = args->uniforms->ShadeLightBlue();
desaturate = args->uniforms->ShadeDesaturate();
inv_desaturate = 256 - desaturate;
}
uint32_t *dest = (uint32_t*)args->dest; uint32_t *dest = (uint32_t*)args->dest;
uint32_t *destLine = dest + args->pitch * y; uint32_t *destLine = dest + args->pitch * y;
int x = x0; int x = x0;
#ifndef NO_SSE
__m128i mfillcolor = _mm_set1_epi32(fillcolor);
__m128i mcapcolor = _mm_unpacklo_epi8(mfillcolor, _mm_setzero_si128());
__m128i malpha = _mm_set1_epi32(alpha);
int sseEnd = x0 + ((x1 - x0) & ~3);
while (x < sseEnd)
{
__m128i fg;
if (ModeT::SWFlags & SWSTYLEF_Fill)
{
fg = mfillcolor;
}
else if (ModeT::SWFlags & SWSTYLEF_FogBoundary)
{
fg = _mm_loadl_epi64((const __m128i*)(destLine + x));
}
else
{
float rcpW0 = 0x01000000 / posXW;
float rcpW1 = 0x01000000 / (posXW + stepXW);
int32_t u0 = (int32_t)(posXU * rcpW0);
int32_t u1 = (int32_t)((posXU + stepXU) * rcpW1);
int32_t v0 = (int32_t)(posXV * rcpW0);
int32_t v1 = (int32_t)((posXV + stepXV) * rcpW1);
uint32_t texelX0 = ((((uint32_t)u0 << 8) >> 16) * texWidth) >> 16;
uint32_t texelX1 = ((((uint32_t)u1 << 8) >> 16) * texWidth) >> 16;
uint32_t texelY0 = ((((uint32_t)v0 << 8) >> 16) * texHeight) >> 16;
uint32_t texelY1 = ((((uint32_t)v1 << 8) >> 16) * texHeight) >> 16;
if (ModeT::SWFlags & SWSTYLEF_Translated)
{
uint32_t fg0 = translation[((const uint8_t*)texPixels)[texelX0 * texHeight + texelY0]];
uint32_t fg1 = translation[((const uint8_t*)texPixels)[texelX1 * texHeight + texelY1]];
fg = _mm_setr_epi32(fg0, fg1, 0, 0);
}
else
{
uint32_t fg0 = texPixels[texelX0 * texHeight + texelY0];
uint32_t fg1 = texPixels[texelX1 * texHeight + texelY1];
fg = _mm_setr_epi32(fg0, fg1, 0, 0);
}
}
if (ModeT::SWFlags & SWSTYLEF_Skycap)
{
float rcpW0 = 0x01000000 / posXW;
float rcpW1 = 0x01000000 / (posXW + stepXW);
int32_t v0 = (int32_t)(posXV * rcpW0);
int32_t v1 = (int32_t)((posXV + stepXV) * rcpW1);
int start_fade = 2; // How fast it should fade out
__m128i v = _mm_setr_epi32(v0, v0, v1, v1);
__m128i alpha_top = _mm_min_epi16(_mm_max_epi16(_mm_srai_epi32(v, 16 - start_fade), _mm_setzero_si128()), _mm_set1_epi16(256));
__m128i alpha_bottom = _mm_min_epi16(_mm_max_epi16(_mm_srai_epi32(_mm_sub_epi32(_mm_set1_epi32(2 << 24), v), 16 - start_fade), _mm_setzero_si128()), _mm_set1_epi16(256));
__m128i a = _mm_min_epi16(alpha_top, alpha_bottom);
a = _mm_shufflelo_epi16(_mm_shufflehi_epi16(a, _MM_SHUFFLE(0, 0, 0, 0)), _MM_SHUFFLE(0, 0, 0, 0));
__m128i inv_a = _mm_sub_epi32(_mm_set1_epi32(256), a);
fg = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
__m128i c = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(fg, a), _mm_mullo_epi16(mcapcolor, inv_a)), _mm_set1_epi16(127)), 8);
_mm_storel_epi64((__m128i*)(destLine + x), _mm_packus_epi16(c, c));
}
else
{
if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill))
{
__m128i rgbmask = _mm_set1_epi32(0x00ffffff);
if (ModeT::Flags & STYLEF_RedIsAlpha)
fg = _mm_or_si128(_mm_andnot_si128(rgbmask, _mm_slli_epi32(fg, 8)), _mm_and_si128(rgbmask, mfillcolor));
else
fg = _mm_or_si128(_mm_andnot_si128(rgbmask, fg), _mm_and_si128(rgbmask, mfillcolor));
}
if (!(ModeT::Flags & STYLEF_Alpha1))
{
__m128i a = _mm_srli_epi32(fg, 24);
a = _mm_srli_epi32(_mm_mullo_epi16(a, malpha), 8);
fg = _mm_or_si128(_mm_and_si128(fg, _mm_set1_epi32(0x00ffffff)), _mm_slli_epi32(a, 24));
}
fg = _mm_unpacklo_epi8(fg, _mm_setzero_si128());
fixed_t lightpos0 = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * posXW), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
fixed_t lightpos1 = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * (posXW + stepXW)), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT);
lightpos0 = (lightpos0 & lightmask) | ((light << 8) & ~lightmask);
lightpos1 = (lightpos1 & lightmask) | ((light << 8) & ~lightmask);
int lightshade0 = lightpos0 >> 8;
int lightshade1 = lightpos1 >> 8;
__m128i shadedfg = _mm_srli_epi16(_mm_mullo_epi16(fg, _mm_setr_epi16(lightshade0, lightshade0, lightshade0, 256, lightshade1, lightshade1, lightshade1, 256)), 8);
__m128i out;
if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero)
{
out = shadedfg;
}
else if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_One)
{
__m128i dest = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(destLine + x)), _mm_setzero_si128());
if (ModeT::BlendOp == STYLEOP_Add)
{
out = _mm_add_epi16(dest, shadedfg);
}
else if (ModeT::BlendOp == STYLEOP_RevSub)
{
out = _mm_sub_epi16(dest, shadedfg);
}
else //if (ModeT::BlendOp == STYLEOP_Sub)
{
out = _mm_sub_epi16(shadedfg, dest);
}
}
else if (ModeT::SWFlags & SWSTYLEF_SrcColorOneMinusSrcColor)
{
__m128i dest = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(destLine + x)), _mm_setzero_si128());
__m128i sfactor = _mm_add_epi16(shadedfg, _mm_srli_epi16(shadedfg, 7));
__m128i dfactor = _mm_sub_epi16(_mm_set1_epi16(256), sfactor);
out = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(dest, dfactor), _mm_mullo_epi16(shadedfg, sfactor)), _mm_set1_epi16(127)), 8);
}
else
{
__m128i dest = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(destLine + x)), _mm_setzero_si128());
__m128i sfactor = _mm_shufflehi_epi16(_mm_shufflelo_epi16(shadedfg, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3));
sfactor = _mm_add_epi16(sfactor, _mm_srli_epi16(sfactor, 7)); // 255 -> 256
__m128i dfactor = _mm_sub_epi16(_mm_set1_epi16(256), sfactor);
__m128i src = _mm_mullo_epi16(shadedfg, sfactor);
if (ModeT::BlendDest == STYLEALPHA_One)
{
dest = _mm_slli_epi16(dest, 8);
}
else
{
__m128i dfactor = _mm_sub_epi16(_mm_set1_epi16(256), sfactor);
dest = _mm_mullo_epi16(dest, dfactor);
}
if (ModeT::BlendOp == STYLEOP_Add)
{
out = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(dest, src), _mm_set1_epi16(127)), 8);
}
else if (ModeT::BlendOp == STYLEOP_RevSub)
{
out = _mm_srli_epi16(_mm_add_epi16(_mm_sub_epi16(dest, src), _mm_set1_epi16(127)), 8);
}
else //if (ModeT::BlendOp == STYLEOP_Sub)
{
out = _mm_srli_epi16(_mm_add_epi16(_mm_sub_epi16(src, dest), _mm_set1_epi16(127)), 8);
}
}
_mm_storel_epi64((__m128i*)(destLine + x), _mm_or_si128(_mm_packus_epi16(out, out), _mm_set1_epi32(0xff000000)));
}
posXW += stepXW + stepXW;
posXU += stepXU + stepXU;
posXV += stepXV + stepXV;
x += 2;
}
#endif
while (x < x1) while (x < x1)
{ {
uint32_t fg; uint32_t fg = 0;
if (ModeT::SWFlags & SWSTYLEF_Fill) if (ModeT::SWFlags & SWSTYLEF_Fill)
{ {
@ -1462,7 +1413,7 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args)
{ {
fg = destLine[x]; fg = destLine[x];
} }
else else if (ModeT::BlendOp != STYLEOP_Fuzz)
{ {
float rcpW = 0x01000000 / posXW; float rcpW = 0x01000000 / posXW;
int32_t u = (int32_t)(posXU * rcpW); int32_t u = (int32_t)(posXU * rcpW);
@ -1474,13 +1425,48 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args)
{ {
fg = translation[((const uint8_t*)texPixels)[texelX * texHeight + texelY]]; fg = translation[((const uint8_t*)texPixels)[texelX * texHeight + texelY]];
} }
else if (ModeT::Flags & STYLEF_RedIsAlpha)
{
fg = ((const uint8_t*)texPixels)[texelX * texHeight + texelY];
}
else else
{ {
fg = texPixels[texelX * texHeight + texelY]; fg = texPixels[texelX * texHeight + texelY];
} }
} }
if (ModeT::SWFlags & SWSTYLEF_Skycap) if (ModeT::BlendOp == STYLEOP_Fuzz)
{
using namespace swrenderer;
float rcpW = 0x01000000 / posXW;
int32_t u = (int32_t)(posXU * rcpW);
int32_t v = (int32_t)(posXV * rcpW);
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]);
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
fixed_t fuzzscale = (200 << FRACBITS) / viewheight;
int scaled_x = (x * fuzzscale) >> FRACBITS;
int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos;
fixed_t fuzzcount = FUZZTABLE << FRACBITS;
fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount;
unsigned int alpha = fuzzoffset[fuzz >> FRACBITS];
sampleshadeout = (sampleshadeout * alpha) >> 5;
uint32_t a = 256 - sampleshadeout;
uint32_t dest = destLine[x];
uint32_t out_r = (RPART(dest) * a) >> 8;
uint32_t out_g = (GPART(dest) * a) >> 8;
uint32_t out_b = (BPART(dest) * a) >> 8;
destLine[x] = MAKEARGB(255, out_r, out_g, out_b);
}
else if (ModeT::SWFlags & SWSTYLEF_Skycap)
{ {
float rcpW = 0x01000000 / posXW; float rcpW = 0x01000000 / posXW;
int32_t v = (int32_t)(posXV * rcpW); int32_t v = (int32_t)(posXV * rcpW);
@ -1516,7 +1502,7 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args)
if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill))
{ {
if (ModeT::Flags & STYLEF_RedIsAlpha) if (ModeT::Flags & STYLEF_RedIsAlpha)
fg = ((fg << 8) & 0xff000000) | (fillcolor & 0x00ffffff); fg = (fg << 24) | (fillcolor & 0x00ffffff);
else else
fg = (fg & 0xff000000) | (fillcolor & 0x00ffffff); fg = (fg & 0xff000000) | (fillcolor & 0x00ffffff);
} }
@ -1528,12 +1514,105 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args)
fgalpha = (fgalpha * alpha) >> 8; fgalpha = (fgalpha * alpha) >> 8;
} }
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * posXW), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); int lightshade;
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); if (OptT::Flags & SWOPT_FixedLight)
int lightshade = lightpos >> 8; {
uint32_t shadedfg_r = (RPART(fg) * lightshade) >> 8; lightshade = light;
uint32_t shadedfg_g = (GPART(fg) * lightshade) >> 8; }
uint32_t shadedfg_b = (BPART(fg) * lightshade) >> 8; else
{
fixed_t maxvis = 24 * FRACUNIT / 32;
fixed_t maxlight = 31 * FRACUNIT / 32;
lightshade = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
}
uint32_t lit_r = 0, lit_g = 0, lit_b = 0;
if (OptT::Flags & SWOPT_DynLights)
{
lit_r = RPART(dynlightcolor);
lit_g = GPART(dynlightcolor);
lit_b = BPART(dynlightcolor);
float rcp_posXW = 1.0f / posXW;
float worldposX = posWorldX * rcp_posXW;
float worldposY = posWorldY * rcp_posXW;
float worldposZ = posWorldZ * rcp_posXW;
for (int i = 0; i < num_lights; i++)
{
float lightposX = lights[i].x;
float lightposY = lights[i].y;
float lightposZ = lights[i].z;
float light_radius = lights[i].radius;
uint32_t light_color = lights[i].color;
bool is_attenuated = light_radius < 0.0f;
if (is_attenuated)
light_radius = -light_radius;
// L = light-pos
// dist = sqrt(dot(L, L))
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
float Lx = lightposX - worldposX;
float Ly = lightposY - worldposY;
float Lz = lightposZ - worldposZ;
float dist2 = Lx * Lx + Ly * Ly + Lz * Lz;
#ifdef NO_SSE
//float rcp_dist = 1.0f / sqrt(dist2);
float rcp_dist = 1.0f / (dist2 * 0.01f);
#else
float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2)));
#endif
float dist = dist2 * rcp_dist;
float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f);
// The simple light type
float simple_attenuation = distance_attenuation;
// The point light type
// diffuse = max(dot(N,normalize(L)),0) * attenuation
Lx *= rcp_dist;
Ly *= rcp_dist;
Lz *= rcp_dist;
float dotNL = worldnormalX * Lx + worldnormalY * Ly + worldnormalZ * Lz;
float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation;
uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation);
lit_r += (RPART(light_color) * attenuation) >> 8;
lit_g += (GPART(light_color) * attenuation) >> 8;
lit_b += (BPART(light_color) * attenuation) >> 8;
}
}
uint32_t shadedfg_r, shadedfg_g, shadedfg_b;
if (OptT::Flags & SWOPT_ColoredFog)
{
uint32_t fg_r = RPART(fg);
uint32_t fg_g = GPART(fg);
uint32_t fg_b = BPART(fg);
uint32_t intensity = ((fg_r * 77 + fg_g * 143 + fg_b * 37) >> 8) * desaturate;
shadedfg_r = (((shade_fade_r + ((fg_r * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_r) >> 8;
shadedfg_g = (((shade_fade_g + ((fg_g * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_g) >> 8;
shadedfg_b = (((shade_fade_b + ((fg_b * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_b) >> 8;
lit_r = MIN(lit_r, (uint32_t)256);
lit_g = MIN(lit_g, (uint32_t)256);
lit_b = MIN(lit_b, (uint32_t)256);
shadedfg_r = MIN(shadedfg_r + ((fg_r * lit_r) >> 8), (uint32_t)255);
shadedfg_g = MIN(shadedfg_g + ((fg_g * lit_g) >> 8), (uint32_t)255);
shadedfg_b = MIN(shadedfg_b + ((fg_b * lit_b) >> 8), (uint32_t)255);
}
else
{
lit_r = MIN(lightshade + lit_r, (uint32_t)256);
lit_g = MIN(lightshade + lit_g, (uint32_t)256);
lit_b = MIN(lightshade + lit_b, (uint32_t)256);
shadedfg_r = (RPART(fg) * lit_r) >> 8;
shadedfg_g = (GPART(fg) * lit_g) >> 8;
shadedfg_b = (BPART(fg) * lit_b) >> 8;
}
if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero)
{ {
@ -1582,16 +1661,15 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args)
destLine[x] = MAKEARGB(255, out_r, out_g, out_b); destLine[x] = MAKEARGB(255, out_r, out_g, out_b);
} }
else if (fgalpha == 255) else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255)
{ {
destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b); destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b);
} }
else if (fgalpha != 0) else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0)
{ {
uint32_t dest = destLine[x]; uint32_t dest = destLine[x];
uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256 uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256
uint32_t dfactor = 256 - sfactor;
uint32_t src_r = shadedfg_r * sfactor; uint32_t src_r = shadedfg_r * sfactor;
uint32_t src_g = shadedfg_g * sfactor; uint32_t src_g = shadedfg_g * sfactor;
uint32_t src_b = shadedfg_b * sfactor; uint32_t src_b = shadedfg_b * sfactor;
@ -1648,49 +1726,139 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args)
posXW += stepXW; posXW += stepXW;
posXU += stepXU; posXU += stepXU;
posXV += stepXV; posXV += stepXV;
if (OptT::Flags & SWOPT_DynLights)
{
posWorldX += stepWorldX;
posWorldY += stepWorldY;
posWorldZ += stepWorldZ;
}
if (!(OptT::Flags & SWOPT_FixedLight))
lightpos += lightstep;
x++; x++;
} }
} }
template<typename ModeT> template<typename ModeT>
void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args)
{ {
using namespace TriScreenDrawerModes; using namespace TriScreenDrawerModes;
float v1X = args->v1->x; if (args->uniforms->NumLights() == 0)
float v1Y = args->v1->y; {
float v1W = args->v1->w; if (!args->uniforms->FixedLight())
float v1U = args->v1->u * v1W; {
float v1V = args->v1->v * v1W; if (args->uniforms->SimpleShade())
float stepXW = args->gradientX.W; DrawSpanOpt32<ModeT, DrawerOpt>(y, x0, x1, args);
float stepXU = args->gradientX.U; else
float stepXV = args->gradientX.V; DrawSpanOpt32<ModeT, DrawerOptC>(y, x0, x1, args);
float startX = x0 + (0.5f - v1X); }
float startY = y + (0.5f - v1Y); else
float posXW = v1W + stepXW * startX + args->gradientY.W * startY; {
float posXU = v1U + stepXU * startX + args->gradientY.U * startY; if (args->uniforms->SimpleShade())
float posXV = v1V + stepXV * startX + args->gradientY.V * startY; DrawSpanOpt32<ModeT, DrawerOptF>(y, x0, x1, args);
else
DrawSpanOpt32<ModeT, DrawerOptCF>(y, x0, x1, args);
}
}
else
{
if (!args->uniforms->FixedLight())
{
if (args->uniforms->SimpleShade())
DrawSpanOpt32<ModeT, DrawerOptL>(y, x0, x1, args);
else
DrawSpanOpt32<ModeT, DrawerOptLC>(y, x0, x1, args);
}
else
{
if (args->uniforms->SimpleShade())
DrawSpanOpt32<ModeT, DrawerOptLF>(y, x0, x1, args);
else
DrawSpanOpt32<ModeT, DrawerOptLCF>(y, x0, x1, args);
}
}
}
auto colormaps = args->uniforms->BaseColormap(); template<typename ModeT, typename OptT>
void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args)
{
using namespace TriScreenDrawerModes;
const uint8_t *texPixels = args->uniforms->TexturePixels(); float v1X, v1Y, v1W, v1U, v1V, v1WorldX, v1WorldY, v1WorldZ;
const uint8_t *translation = args->uniforms->Translation(); float startX, startY;
int texWidth = args->uniforms->TextureWidth(); float stepXW, stepXU, stepXV, stepWorldX, stepWorldY, stepWorldZ;
int texHeight = args->uniforms->TextureHeight(); float posXW, posXU, posXV, posWorldX, posWorldY, posWorldZ;
int fillcolor = args->uniforms->Color(); PolyLight *lights;
int alpha = args->uniforms->SrcAlpha(); int num_lights;
float worldnormalX, worldnormalY, worldnormalZ;
uint32_t dynlightcolor;
const uint8_t *colormaps, *texPixels, *translation;
int texWidth, texHeight;
uint32_t fillcolor, capcolor;
int alpha;
uint32_t light;
fixed_t shade, lightpos, lightstep;
v1X = args->v1->x;
v1Y = args->v1->y;
v1W = args->v1->w;
v1U = args->v1->u * v1W;
v1V = args->v1->v * v1W;
startX = x0 + (0.5f - v1X);
startY = y + (0.5f - v1Y);
stepXW = args->gradientX.W;
stepXU = args->gradientX.U;
stepXV = args->gradientX.V;
posXW = v1W + stepXW * startX + args->gradientY.W * startY;
posXU = v1U + stepXU * startX + args->gradientY.U * startY;
posXV = v1V + stepXV * startX + args->gradientY.V * startY;
texPixels = args->uniforms->TexturePixels();
translation = args->uniforms->Translation();
texWidth = args->uniforms->TextureWidth();
texHeight = args->uniforms->TextureHeight();
fillcolor = args->uniforms->Color();
alpha = args->uniforms->Alpha();
colormaps = args->uniforms->BaseColormap();
light = args->uniforms->Light();
uint32_t capcolor = fillcolor;
if (ModeT::SWFlags & SWSTYLEF_Skycap) if (ModeT::SWFlags & SWSTYLEF_Skycap)
capcolor = GPalette.BaseColors[capcolor].d; capcolor = GPalette.BaseColors[fillcolor].d;
bool is_fixed_light = args->uniforms->FixedLight(); if (OptT::Flags & SWOPT_FixedLight)
uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; {
uint32_t light = args->uniforms->Light();
float shade = 2.0f - (light + 12.0f) / 128.0f;
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
light += light >> 7; // 255 -> 256 light += light >> 7; // 255 -> 256
light = ((256 - light) * NUMCOLORMAPS) & 0xffffff00;
}
else
{
float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f);
shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT);
lightpos = (fixed_t)(globVis * posXW * (float)FRACUNIT);
lightstep = (fixed_t)(globVis * stepXW * (float)FRACUNIT);
}
if (OptT::Flags & SWOPT_DynLights)
{
v1WorldX = args->v1->worldX * v1W;
v1WorldY = args->v1->worldY * v1W;
v1WorldZ = args->v1->worldZ * v1W;
stepWorldX = args->gradientX.WorldX;
stepWorldY = args->gradientX.WorldY;
stepWorldZ = args->gradientX.WorldZ;
posWorldX = v1WorldX + stepWorldX * startX + args->gradientY.WorldX * startY;
posWorldY = v1WorldY + stepWorldY * startX + args->gradientY.WorldY * startY;
posWorldZ = v1WorldZ + stepWorldZ * startX + args->gradientY.WorldZ * startY;
lights = args->uniforms->Lights();
num_lights = args->uniforms->NumLights();
worldnormalX = args->uniforms->Normal().X;
worldnormalY = args->uniforms->Normal().Y;
worldnormalZ = args->uniforms->Normal().Z;
dynlightcolor = args->uniforms->DynLightColor();
}
uint8_t *dest = (uint8_t*)args->dest; uint8_t *dest = (uint8_t*)args->dest;
uint8_t *destLine = dest + args->pitch * y; uint8_t *destLine = dest + args->pitch * y;
@ -1698,7 +1866,7 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args)
int x = x0; int x = x0;
while (x < x1) while (x < x1)
{ {
int fg; int fg = 0;
int fgalpha = 255; int fgalpha = 255;
if (ModeT::SWFlags & SWSTYLEF_Fill) if (ModeT::SWFlags & SWSTYLEF_Fill)
@ -1709,7 +1877,7 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args)
{ {
fg = destLine[x]; fg = destLine[x];
} }
else else if (ModeT::BlendOp != STYLEOP_Fuzz)
{ {
float rcpW = 0x01000000 / posXW; float rcpW = 0x01000000 / posXW;
int32_t u = (int32_t)(posXU * rcpW); int32_t u = (int32_t)(posXU * rcpW);
@ -1724,7 +1892,37 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args)
fgalpha = (fg != 0) ? 255 : 0; fgalpha = (fg != 0) ? 255 : 0;
} }
if (ModeT::SWFlags & SWSTYLEF_Skycap) if (ModeT::BlendOp == STYLEOP_Fuzz)
{
using namespace swrenderer;
float rcpW = 0x01000000 / posXW;
int32_t u = (int32_t)(posXU * rcpW);
int32_t v = (int32_t)(posXV * rcpW);
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0;
fixed_t fuzzscale = (200 << FRACBITS) / viewheight;
int scaled_x = (x * fuzzscale) >> FRACBITS;
int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos;
fixed_t fuzzcount = FUZZTABLE << FRACBITS;
fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount;
unsigned int alpha = fuzzoffset[fuzz >> FRACBITS];
sampleshadeout = (sampleshadeout * alpha) >> 5;
uint32_t a = 256 - sampleshadeout;
uint32_t dest = GPalette.BaseColors[destLine[x]].d;
uint32_t r = (RPART(dest) * a) >> 8;
uint32_t g = (GPART(dest) * a) >> 8;
uint32_t b = (BPART(dest) * a) >> 8;
destLine[x] = RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)];
}
else if (ModeT::SWFlags & SWSTYLEF_Skycap)
{ {
float rcpW = 0x01000000 / posXW; float rcpW = 0x01000000 / posXW;
int32_t v = (int32_t)(posXV * rcpW); int32_t v = (int32_t)(posXV * rcpW);
@ -1771,11 +1969,95 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args)
fgalpha = (fgalpha * alpha) >> 8; fgalpha = (fgalpha * alpha) >> 8;
} }
fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * posXW), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); uint8_t shadedfg;
lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); if (OptT::Flags & SWOPT_FixedLight)
int lightshade = lightpos >> 8; {
lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00; shadedfg = colormaps[light + fg];
uint8_t shadedfg = colormaps[lightshade + fg]; }
else
{
fixed_t maxvis = 24 * FRACUNIT / 32;
fixed_t maxlight = 31 * FRACUNIT / 32;
int lightshade = (FRACUNIT - clamp<fixed_t>(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8;
lightshade = ((256 - lightshade) << 5) & 0xffffff00;
shadedfg = colormaps[lightshade + fg];
}
if (OptT::Flags & SWOPT_DynLights)
{
uint32_t lit_r = RPART(dynlightcolor);
uint32_t lit_g = GPART(dynlightcolor);
uint32_t lit_b = BPART(dynlightcolor);
#ifdef NO_SSE
float rcp_posXW = 1.0f / posXW;
#else
float rcp_posXW = _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ss(posXW)));
#endif
float worldposX = posWorldX * rcp_posXW;
float worldposY = posWorldY * rcp_posXW;
float worldposZ = posWorldZ * rcp_posXW;
for (int i = 0; i < num_lights; i++)
{
float lightposX = lights[i].x;
float lightposY = lights[i].y;
float lightposZ = lights[i].z;
float light_radius = lights[i].radius;
uint32_t light_color = lights[i].color;
bool is_attenuated = light_radius < 0.0f;
if (is_attenuated)
light_radius = -light_radius;
// L = light-pos
// dist = sqrt(dot(L, L))
// distance_attenuation = 1 - MIN(dist * (1/radius), 1)
float Lx = lightposX - worldposX;
float Ly = lightposY - worldposY;
float Lz = lightposZ - worldposZ;
float dist2 = Lx * Lx + Ly * Ly + Lz * Lz;
#ifdef NO_SSE
//float rcp_dist = 1.0f / sqrt(dist2);
float rcp_dist = 1.0f / (dist2 * 0.01f);
#else
float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2)));
#endif
float dist = dist2 * rcp_dist;
float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f);
// The simple light type
float simple_attenuation = distance_attenuation;
// The point light type
// diffuse = max(dot(N,normalize(L)),0) * attenuation
Lx *= rcp_dist;
Ly *= rcp_dist;
Lz *= rcp_dist;
float dotNL = worldnormalX * Lx + worldnormalY * Ly + worldnormalZ * Lz;
float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation;
uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation);
lit_r += (RPART(light_color) * attenuation) >> 8;
lit_g += (GPART(light_color) * attenuation) >> 8;
lit_b += (BPART(light_color) * attenuation) >> 8;
}
if (lit_r || lit_g || lit_b)
{
lit_r = MIN(lit_r, (uint32_t)256);
lit_g = MIN(lit_g, (uint32_t)256);
lit_b = MIN(lit_b, (uint32_t)256);
uint32_t fgrgb = GPalette.BaseColors[fg];
uint32_t shadedfgrgb = GPalette.BaseColors[shadedfg];
uint32_t out_r = MIN(((RPART(fgrgb) * lit_r) >> 8) + RPART(shadedfgrgb), (uint32_t)255);
uint32_t out_g = MIN(((GPART(fgrgb) * lit_g) >> 8) + GPART(shadedfgrgb), (uint32_t)255);
uint32_t out_b = MIN(((BPART(fgrgb) * lit_b) >> 8) + BPART(shadedfgrgb), (uint32_t)255);
shadedfg = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)];
}
}
if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero)
{ {
@ -1826,11 +2108,11 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args)
destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)];
} }
else if (fgalpha == 255) else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255)
{ {
destLine[x] = shadedfg; destLine[x] = shadedfg;
} }
else if (fgalpha != 0) else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0)
{ {
uint32_t src = GPalette.BaseColors[shadedfg]; uint32_t src = GPalette.BaseColors[shadedfg];
uint32_t dest = GPalette.BaseColors[destLine[x]]; uint32_t dest = GPalette.BaseColors[destLine[x]];
@ -1893,10 +2175,39 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args)
posXW += stepXW; posXW += stepXW;
posXU += stepXU; posXU += stepXU;
posXV += stepXV; posXV += stepXV;
if (OptT::Flags & SWOPT_DynLights)
{
posWorldX += stepWorldX;
posWorldY += stepWorldY;
posWorldZ += stepWorldZ;
}
if (!(OptT::Flags & SWOPT_FixedLight))
lightpos += lightstep;
x++; x++;
} }
} }
template<typename ModeT>
void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args)
{
using namespace TriScreenDrawerModes;
if (args->uniforms->NumLights() == 0)
{
if (!args->uniforms->FixedLight())
DrawSpanOpt8<ModeT, DrawerOptC>(y, x0, x1, args);
else
DrawSpanOpt8<ModeT, DrawerOptCF>(y, x0, x1, args);
}
else
{
if (!args->uniforms->FixedLight())
DrawSpanOpt8<ModeT, DrawerOptLC>(y, x0, x1, args);
else
DrawSpanOpt8<ModeT, DrawerOptLCF>(y, x0, x1, args);
}
}
void(*ScreenTriangle::SpanDrawers8[])(int, int, int, const TriDrawTriangleArgs *) = void(*ScreenTriangle::SpanDrawers8[])(int, int, int, const TriDrawTriangleArgs *) =
{ {
&DrawSpan8<TriScreenDrawerModes::StyleOpaque>, &DrawSpan8<TriScreenDrawerModes::StyleOpaque>,

View file

@ -251,6 +251,22 @@ namespace TriScreenDrawerModes
struct FuzzSampler { static const int Mode = (int)Samplers::Fuzz; }; struct FuzzSampler { static const int Mode = (int)Samplers::Fuzz; };
struct FogBoundarySampler { static const int Mode = (int)Samplers::FogBoundary; }; struct FogBoundarySampler { static const int Mode = (int)Samplers::FogBoundary; };
enum SWOptFlags
{
SWOPT_DynLights = 1,
SWOPT_ColoredFog = 2,
SWOPT_FixedLight = 4
};
struct DrawerOpt { static const int Flags = 0; };
struct DrawerOptF { static const int Flags = SWOPT_FixedLight; };
struct DrawerOptC { static const int Flags = SWOPT_ColoredFog; };
struct DrawerOptCF { static const int Flags = SWOPT_ColoredFog | SWOPT_FixedLight; };
struct DrawerOptL { static const int Flags = SWOPT_DynLights; };
struct DrawerOptLC { static const int Flags = SWOPT_DynLights | SWOPT_ColoredFog; };
struct DrawerOptLF { static const int Flags = SWOPT_DynLights | SWOPT_FixedLight; };
struct DrawerOptLCF { static const int Flags = SWOPT_DynLights | SWOPT_ColoredFog | SWOPT_FixedLight; };
static const int fuzzcolormap[FUZZTABLE] = static const int fuzzcolormap[FUZZTABLE] =
{ {
6, 11, 6, 11, 6, 6, 11, 6, 6, 11, 6, 11, 6, 11, 6, 6, 11, 6, 6, 11,

View file

@ -56,12 +56,16 @@ void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, co
ModelActor = actor; ModelActor = actor;
const_cast<VSMatrix &>(objectToWorldMatrix).copy(ObjectToWorld.Matrix); const_cast<VSMatrix &>(objectToWorldMatrix).copy(ObjectToWorld.Matrix);
SetTransform(); SetTransform();
if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES))
PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true);
} }
void PolyModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) void PolyModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf)
{ {
if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES))
PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false);
ModelActor = nullptr; ModelActor = nullptr;
} }
@ -100,6 +104,8 @@ void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectT
const_cast<VSMatrix &>(objectToWorldMatrix).copy(ObjectToWorld.Matrix); const_cast<VSMatrix &>(objectToWorldMatrix).copy(ObjectToWorld.Matrix);
SetTransform(); SetTransform();
PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, true); PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, true);
if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])
PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true);
} }
@ -107,6 +113,8 @@ void PolyModelRenderer::EndDrawHUDModel(AActor *actor)
{ {
ModelActor = nullptr; ModelActor = nullptr;
PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, false); PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, false);
if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])
PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false);
} }
@ -146,8 +154,7 @@ void PolyModelRenderer::DrawArrays(int start, int count)
args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, PolyRenderer::Instance()->Light.SpriteGlobVis(foggy), fullbrightSprite); args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, PolyRenderer::Instance()->Light.SpriteGlobVis(foggy), fullbrightSprite);
args.SetStencilTestValue(StencilValue); args.SetStencilTestValue(StencilValue);
args.SetClipPlane(0, PolyClipPlane()); args.SetClipPlane(0, PolyClipPlane());
args.SetStyle(TriBlendMode::Opaque); args.SetStyle(ModelActor->RenderStyle, ModelActor->Alpha, ModelActor->fillcolor, ModelActor->Translation, SkinTexture, fullbrightSprite);
args.SetTexture(SkinTexture, DefaultRenderStyle());
args.SetDepthTest(true); args.SetDepthTest(true);
args.SetWriteDepth(true); args.SetWriteDepth(true);
args.SetWriteStencil(false); args.SetWriteStencil(false);
@ -169,8 +176,7 @@ void PolyModelRenderer::DrawElements(int numIndices, size_t offset)
args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, PolyRenderer::Instance()->Light.SpriteGlobVis(foggy), fullbrightSprite); args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, PolyRenderer::Instance()->Light.SpriteGlobVis(foggy), fullbrightSprite);
args.SetStencilTestValue(StencilValue); args.SetStencilTestValue(StencilValue);
args.SetClipPlane(0, PolyClipPlane()); args.SetClipPlane(0, PolyClipPlane());
args.SetStyle(TriBlendMode::Opaque); args.SetStyle(ModelActor->RenderStyle, ModelActor->Alpha, ModelActor->fillcolor, ModelActor->Translation, SkinTexture, fullbrightSprite);
args.SetTexture(SkinTexture, DefaultRenderStyle());
args.SetDepthTest(true); args.SetDepthTest(true);
args.SetWriteDepth(true); args.SetWriteDepth(true);
args.SetWriteStencil(false); args.SetWriteStencil(false);

View file

@ -354,7 +354,14 @@ void RenderPolyWall::Render(PolyRenderThread *thread)
} }
else else
{ {
args.SetStyle(Additive ? TriBlendMode::Add : TriBlendMode::Normal, MIN(Alpha, 1.0)); double a = MIN(Alpha, 1.0);
if (Additive)
args.SetStyle(TriBlendMode::Add, a);
else if (a < 1.0)
args.SetStyle(TriBlendMode::Translucent, a);
else
args.SetStyle(TriBlendMode::Normal);
args.SetStencilTestValue(StencilValue + 1); args.SetStencilTestValue(StencilValue + 1);
args.SetDepthTest(true); args.SetDepthTest(true);
args.SetWriteDepth(true); args.SetWriteDepth(true);

View file

@ -120,13 +120,17 @@ namespace swrenderer
} }
SetTransform(); SetTransform();
if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES))
PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true);
} }
void SWModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) void SWModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf)
{ {
ModelActor = nullptr; if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES))
PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false);
ModelActor = nullptr;
} }
IModelVertexBuffer *SWModelRenderer::CreateVertexBuffer(bool needindex, bool singleframe) IModelVertexBuffer *SWModelRenderer::CreateVertexBuffer(bool needindex, bool singleframe)
@ -185,6 +189,8 @@ namespace swrenderer
ClipBottom = {}; ClipBottom = {};
SetTransform(); SetTransform();
PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, true); PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, true);
if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])
PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true);
} }
@ -192,6 +198,8 @@ namespace swrenderer
{ {
ModelActor = nullptr; ModelActor = nullptr;
PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, false); PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, false);
if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal])
PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false);
} }
@ -229,13 +237,7 @@ namespace swrenderer
PolyDrawArgs args; PolyDrawArgs args;
args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, Thread->Light->SpriteGlobVis(foggy), fullbrightSprite); args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, Thread->Light->SpriteGlobVis(foggy), fullbrightSprite);
args.SetStyle(TriBlendMode::Opaque); args.SetStyle(ModelActor->RenderStyle, ModelActor->Alpha, ModelActor->fillcolor, ModelActor->Translation, SkinTexture, fullbrightSprite);
if (Thread->Viewport->RenderTarget->IsBgra())
args.SetTexture((const uint8_t *)SkinTexture->GetPixelsBgra(), SkinTexture->GetWidth(), SkinTexture->GetHeight());
else
args.SetTexture(SkinTexture->GetPixels(DefaultRenderStyle()), SkinTexture->GetWidth(), SkinTexture->GetHeight());
args.SetDepthTest(true); args.SetDepthTest(true);
args.SetWriteDepth(true); args.SetWriteDepth(true);
args.SetWriteStencil(false); args.SetWriteStencil(false);
@ -259,13 +261,7 @@ namespace swrenderer
PolyDrawArgs args; PolyDrawArgs args;
args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, Thread->Light->SpriteGlobVis(foggy), fullbrightSprite); args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, Thread->Light->SpriteGlobVis(foggy), fullbrightSprite);
args.SetStyle(TriBlendMode::Opaque); args.SetStyle(ModelActor->RenderStyle, ModelActor->Alpha, ModelActor->fillcolor, ModelActor->Translation, SkinTexture, fullbrightSprite);
if (Thread->Viewport->RenderTarget->IsBgra())
args.SetTexture((const uint8_t *)SkinTexture->GetPixelsBgra(), SkinTexture->GetWidth(), SkinTexture->GetHeight());
else
args.SetTexture(SkinTexture->GetPixels(DefaultRenderStyle()), SkinTexture->GetWidth(), SkinTexture->GetHeight());
args.SetDepthTest(true); args.SetDepthTest(true);
args.SetWriteDepth(true); args.SetWriteDepth(true);
args.SetWriteStencil(false); args.SetWriteStencil(false);