From 60929f353f427c6b63c9fc2a4bc9fa2c0cad6fd6 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 29 May 2018 23:32:57 +0200 Subject: [PATCH] - replace softpoly block drawers with span drawers and make them use blending rules directly from render styles - add dynlights to softpoly pal mode (cherry picked from commit 4e6226fc2d98f0ca0a641f15141313c3afc85320) --- src/polyrenderer/drawers/poly_draw_args.cpp | 12 +- src/polyrenderer/drawers/poly_draw_args.h | 7 +- src/polyrenderer/drawers/poly_drawer32.h | 447 ---------- src/polyrenderer/drawers/poly_drawer32_sse2.h | 455 ---------- src/polyrenderer/drawers/poly_drawer8.h | 200 ----- src/polyrenderer/drawers/poly_triangle.cpp | 8 + src/polyrenderer/drawers/poly_triangle.h | 2 + src/polyrenderer/drawers/screen_triangle.cpp | 799 ++++++++++++------ src/polyrenderer/drawers/screen_triangle.h | 16 + src/polyrenderer/scene/poly_model.cpp | 22 +- src/polyrenderer/scene/poly_wall.cpp | 9 +- src/swrenderer/things/r_model.cpp | 32 +- 12 files changed, 626 insertions(+), 1383 deletions(-) diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index 8cbef26a7..8aa79f70b 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -52,7 +52,7 @@ void PolyDrawArgs::SetTexture(FTexture *texture, FRenderStyle style) mTexture = texture; mTextureWidth = texture->GetWidth(); mTextureHeight = texture->GetHeight(); - if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + if (PolyTriangleDrawer::IsBgra()) mTexturePixels = (const uint8_t *)texture->GetPixelsBgra(); else mTexturePixels = texture->GetPixels(style); @@ -67,7 +67,7 @@ void PolyDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, FRender FRemapTable *table = TranslationToTable(translationID); if (table != nullptr && !table->Inactive) { - if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + if (PolyTriangleDrawer::IsBgra()) mTranslation = (uint8_t*)table->Palette; else mTranslation = table->Remap; @@ -121,7 +121,7 @@ void PolyDrawArgs::SetLight(FSWColormap *base_colormap, uint32_t lightlevel, dou void PolyDrawArgs::SetColor(uint32_t bgra, uint8_t palindex) { - if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + if (PolyTriangleDrawer::IsBgra()) { mColor = bgra; } @@ -217,7 +217,7 @@ void RectDrawArgs::SetTexture(FTexture *texture, FRenderStyle style) mTexture = texture; mTextureWidth = texture->GetWidth(); mTextureHeight = texture->GetHeight(); - if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + if (PolyTriangleDrawer::IsBgra()) mTexturePixels = (const uint8_t *)texture->GetPixelsBgra(); else mTexturePixels = texture->GetPixels(style); @@ -231,7 +231,7 @@ void RectDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, FRender FRemapTable *table = TranslationToTable(translationID); if (table != nullptr && !table->Inactive) { - if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + if (PolyTriangleDrawer::IsBgra()) mTranslation = (uint8_t*)table->Palette; else mTranslation = table->Remap; @@ -275,7 +275,7 @@ void RectDrawArgs::SetLight(FSWColormap *base_colormap, uint32_t lightlevel) void RectDrawArgs::SetColor(uint32_t bgra, uint8_t palindex) { - if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + if (PolyTriangleDrawer::IsBgra()) { mColor = bgra; } diff --git a/src/polyrenderer/drawers/poly_draw_args.h b/src/polyrenderer/drawers/poly_draw_args.h index a7b836fd7..88f174525 100644 --- a/src/polyrenderer/drawers/poly_draw_args.h +++ b/src/polyrenderer/drawers/poly_draw_args.h @@ -75,7 +75,7 @@ public: void SetWriteColor(bool enable) { mWriteColor = enable; } void SetWriteStencil(bool enable, uint8_t stencilWriteValue = 0) { mWriteStencil = enable; mStencilWriteValue = stencilWriteValue; } void SetWriteDepth(bool enable) { mWriteDepth = enable; } - void SetStyle(TriBlendMode blendmode, double alpha = 1.0) { mBlendMode = blendmode; mSrcAlpha = (uint32_t)(alpha * 256.0 + 0.5); } + void SetStyle(TriBlendMode blendmode, double alpha = 1.0) { mBlendMode = blendmode; mAlpha = (uint32_t)(alpha * 256.0 + 0.5); } void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); void SetColor(uint32_t bgra, uint8_t palindex); void SetLights(PolyLight *lights, int numLights) { mLights = lights; mNumLights = numLights; } @@ -107,8 +107,7 @@ public: TriBlendMode BlendMode() const { return mBlendMode; } uint32_t Color() const { return mColor; } - uint32_t SrcAlpha() const { return mSrcAlpha; } - uint32_t DestAlpha() const { return 256 - mSrcAlpha; } + uint32_t Alpha() const { return mAlpha; } float GlobVis() const { return mGlobVis; } uint32_t Light() const { return mLight; } @@ -155,7 +154,7 @@ private: TriBlendMode mBlendMode = TriBlendMode::Fill; uint32_t mLight = 0; uint32_t mColor = 0; - uint32_t mSrcAlpha = 0; + uint32_t mAlpha = 0; uint16_t mLightAlpha = 0; uint16_t mLightRed = 0; uint16_t mLightGreen = 0; diff --git a/src/polyrenderer/drawers/poly_drawer32.h b/src/polyrenderer/drawers/poly_drawer32.h index 579330ad1..721fa7cfd 100644 --- a/src/polyrenderer/drawers/poly_drawer32.h +++ b/src/polyrenderer/drawers/poly_drawer32.h @@ -327,453 +327,6 @@ namespace TriScreenDrawerModes } } -template -class TriScreenDrawer32 -{ -public: - static void Execute(int x, int y, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args) - { - using namespace TriScreenDrawerModes; - - bool is_simple_shade = args->uniforms->SimpleShade(); - - if (SamplerT::Mode == (int)Samplers::Texture) - { - bool is_nearest_filter = args->uniforms->NearestFilter(); - - if (is_simple_shade) - { - if (is_nearest_filter) - DrawBlock(x, y, mask0, mask1, args); - else - DrawBlock(x, y, mask0, mask1, args); - } - else - { - if (is_nearest_filter) - DrawBlock(x, y, mask0, mask1, args); - else - DrawBlock(x, y, mask0, mask1, args); - } - } - else if (SamplerT::Mode == (int)Samplers::Fuzz) - { - DrawBlock(x, y, mask0, mask1, args); - } - else // no linear filtering for translated, shaded, stencil, fill or skycap - { - if (is_simple_shade) - { - DrawBlock(x, y, mask0, mask1, args); - } - else - { - DrawBlock(x, y, mask0, mask1, args); - } - } - } - -private: - template - FORCEINLINE static void DrawBlock(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args) - { - using namespace TriScreenDrawerModes; - - bool is_fixed_light = args->uniforms->FixedLight(); - uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; - uint32_t srcalpha = args->uniforms->SrcAlpha(); - uint32_t destalpha = args->uniforms->DestAlpha(); - - auto lights = args->uniforms->Lights(); - auto num_lights = args->uniforms->NumLights(); - FVector3 worldnormal = args->uniforms->Normal(); - uint32_t dynlightcolor = args->uniforms->DynLightColor(); - - // Calculate gradients - const ShadedTriVertex &v1 = *args->v1; - ScreenTriangleStepVariables gradientX = args->gradientX; - ScreenTriangleStepVariables gradientY = args->gradientY; - ScreenTriangleStepVariables blockPosY; - blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y); - blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y); - blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y); - blockPosY.WorldX = v1.worldX * v1.w + gradientX.WorldX * (destX - v1.x) + gradientY.WorldX * (destY - v1.y); - blockPosY.WorldY = v1.worldY * v1.w + gradientX.WorldY * (destX - v1.x) + gradientY.WorldY * (destY - v1.y); - blockPosY.WorldZ = v1.worldZ * v1.w + gradientX.WorldZ * (destX - v1.x) + gradientY.WorldZ * (destY - v1.y); - gradientX.W *= 8.0f; - gradientX.U *= 8.0f; - gradientX.V *= 8.0f; - gradientX.WorldX *= 8.0f; - gradientX.WorldY *= 8.0f; - gradientX.WorldZ *= 8.0f; - - // Output - uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; - int pitch = args->pitch; - uint32_t *dest = destOrg + destX + destY * pitch; - - // Light - uint32_t light = args->uniforms->Light(); - float shade = 2.0f - (light + 12.0f) / 128.0f; - float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f); - light += (light >> 7); // 255 -> 256 - - // Sampling stuff - uint32_t color = args->uniforms->Color(); - const uint32_t * RESTRICT translation = (const uint32_t *)args->uniforms->Translation(); - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->uniforms->TexturePixels(); - uint32_t texWidth = args->uniforms->TextureWidth(); - uint32_t texHeight = args->uniforms->TextureHeight(); - uint32_t oneU, oneV; - if (SamplerT::Mode != (int)Samplers::Fill) - { - oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; - oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; - } - else - { - oneU = 0; - oneV = 0; - } - - // Shade constants - int inv_desaturate; - BgraColor shade_fade, shade_light; - int desaturate; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - shade_fade.r = args->uniforms->ShadeFadeRed(); - shade_fade.g = args->uniforms->ShadeFadeGreen(); - shade_fade.b = args->uniforms->ShadeFadeBlue(); - shade_light.r = args->uniforms->ShadeLightRed(); - shade_light.g = args->uniforms->ShadeLightGreen(); - shade_light.b = args->uniforms->ShadeLightBlue(); - desaturate = args->uniforms->ShadeDesaturate(); - inv_desaturate = 256 - desaturate; - } - else - { - inv_desaturate = 0; - shade_fade.r = 0; - shade_fade.g = 0; - shade_fade.b = 0; - shade_light.r = 0; - shade_light.g = 0; - shade_light.b = 0; - desaturate = 0; - } - - if (mask0 == 0xffffffff && mask1 == 0xffffffff) - { - for (int y = 0; y < 8; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - FVector3 worldpos = FVector3(blockPosY.WorldX, blockPosY.WorldY, blockPosY.WorldZ) / blockPosY.W; - BgraColor dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - blockPosX.WorldX += gradientX.WorldX; - blockPosX.WorldY += gradientX.WorldY; - blockPosX.WorldZ += gradientX.WorldZ; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - worldpos = FVector3(blockPosX.WorldX, blockPosX.WorldY, blockPosX.WorldZ) / blockPosX.W; - BgraColor dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - BgraColor dynlightstep; - dynlightstep.r = int32_t(dynlightnext.r - dynlight.r) >> 3; - dynlightstep.g = int32_t(dynlightnext.g - dynlight.g) >> 3; - dynlightstep.b = int32_t(dynlightnext.b - dynlight.b) >> 3; - - for (int ix = 0; ix < 8; ix++) - { - // Load bgcolor - BgraColor bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - bgcolor = dest[ix]; - else - bgcolor = 0; - - // Sample fgcolor - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[ix]; - unsigned int ifgcolor = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - unsigned int ifgshade = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + ix, destY + y); - posU += stepU; - posV += stepV; - - // Setup light - int lightpos0 = lightpos >> 8; - lightpos += lightstep; - BgraColor mlight; - mlight.r = lightpos0; - mlight.g = lightpos0; - mlight.b = lightpos0; - - BgraColor shade_fade_lit; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - uint32_t inv_light = 256 - lightpos0; - shade_fade_lit.r = shade_fade.r * inv_light; - shade_fade_lit.g = shade_fade.g * inv_light; - shade_fade_lit.b = shade_fade.b * inv_light; - } - else - { - shade_fade_lit.r = 0; - shade_fade_lit.g = 0; - shade_fade_lit.b = 0; - } - - // Shade and blend - BgraColor fgcolor = Shade32(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); - BgraColor outcolor = Blend32(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha); - - // Store result - dest[ix] = outcolor; - - dynlight.r = MAX(dynlight.r + dynlightstep.r, 0); - dynlight.g = MAX(dynlight.g + dynlightstep.g, 0); - dynlight.b = MAX(dynlight.b + dynlightstep.b, 0); - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - blockPosY.WorldX += gradientY.WorldX; - blockPosY.WorldY += gradientY.WorldY; - blockPosY.WorldZ += gradientY.WorldZ; - - dest += pitch; - } - } - else - { - // mask0 loop: - for (int y = 0; y < 4; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - FVector3 worldpos = FVector3(blockPosY.WorldX, blockPosY.WorldY, blockPosY.WorldZ) / blockPosY.W; - BgraColor dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - blockPosX.WorldX += gradientX.WorldX; - blockPosX.WorldY += gradientX.WorldY; - blockPosX.WorldZ += gradientX.WorldZ; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - worldpos = FVector3(blockPosX.WorldX, blockPosX.WorldY, blockPosX.WorldZ) / blockPosX.W; - BgraColor dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - BgraColor dynlightstep; - dynlightstep.r = int32_t(dynlightnext.r - dynlight.r) >> 3; - dynlightstep.g = int32_t(dynlightnext.g - dynlight.g) >> 3; - dynlightstep.b = int32_t(dynlightnext.b - dynlight.b) >> 3; - - for (int x = 0; x < 8; x++) - { - // Load bgcolor - BgraColor bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - { - if (mask0 & (1 << 31)) bgcolor = dest[x]; - } - else - bgcolor = 0; - - // Sample fgcolor - if (SamplerT::Mode == (int)Samplers::FogBoundary && (mask0 & (1 << 31))) color = dest[x]; - unsigned int ifgcolor = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - unsigned int ifgshade = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + x, destY + y); - posU += stepU; - posV += stepV; - - // Setup light - int lightpos0 = lightpos >> 8; - lightpos += lightstep; - BgraColor mlight; - mlight.r = lightpos0; - mlight.g = lightpos0; - mlight.b = lightpos0; - - BgraColor shade_fade_lit; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - uint32_t inv_light = 256 - lightpos0; - shade_fade_lit.r = shade_fade.r * inv_light; - shade_fade_lit.g = shade_fade.g * inv_light; - shade_fade_lit.b = shade_fade.b * inv_light; - } - else - { - shade_fade_lit.r = 0; - shade_fade_lit.g = 0; - shade_fade_lit.b = 0; - } - - // Shade and blend - BgraColor fgcolor = Shade32(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); - BgraColor outcolor = Blend32(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha); - - // Store result - if (mask0 & (1 << 31)) dest[x] = outcolor; - - mask0 <<= 1; - - dynlight.r = MAX(dynlight.r + dynlightstep.r, 0); - dynlight.g = MAX(dynlight.g + dynlightstep.g, 0); - dynlight.b = MAX(dynlight.b + dynlightstep.b, 0); - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - blockPosY.WorldX += gradientY.WorldX; - blockPosY.WorldY += gradientY.WorldY; - blockPosY.WorldZ += gradientY.WorldZ; - - dest += pitch; - } - - // mask1 loop: - for (int y = 0; y < 4; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - FVector3 worldpos = FVector3(blockPosY.WorldX, blockPosY.WorldY, blockPosY.WorldZ) / blockPosY.W; - BgraColor dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - blockPosX.WorldX += gradientX.WorldX; - blockPosX.WorldY += gradientX.WorldY; - blockPosX.WorldZ += gradientX.WorldZ; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - worldpos = FVector3(blockPosX.WorldX, blockPosX.WorldY, blockPosX.WorldZ) / blockPosX.W; - BgraColor dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - BgraColor dynlightstep; - dynlightstep.r = int32_t(dynlightnext.r - dynlight.r) >> 3; - dynlightstep.g = int32_t(dynlightnext.g - dynlight.g) >> 3; - dynlightstep.b = int32_t(dynlightnext.b - dynlight.b) >> 3; - - for (int x = 0; x < 8; x++) - { - // Load bgcolor - BgraColor bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - { - if (mask1 & (1 << 31)) bgcolor = dest[x]; - } - else - bgcolor = 0; - - // Sample fgcolor - if (SamplerT::Mode == (int)Samplers::FogBoundary && (mask1 & (1 << 31))) color = dest[x]; - unsigned int ifgcolor = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - unsigned int ifgshade = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + x, destY + 4 + y); - posU += stepU; - posV += stepV; - - // Setup light - int lightpos0 = lightpos >> 8; - lightpos += lightstep; - BgraColor mlight; - mlight.r = lightpos0; - mlight.g = lightpos0; - mlight.b = lightpos0; - - BgraColor shade_fade_lit; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - uint32_t inv_light = 256 - lightpos0; - shade_fade_lit.r = shade_fade.r * inv_light; - shade_fade_lit.g = shade_fade.g * inv_light; - shade_fade_lit.b = shade_fade.b * inv_light; - } - else - { - shade_fade_lit.r = 0; - shade_fade_lit.g = 0; - shade_fade_lit.b = 0; - } - - // Shade and blend - BgraColor fgcolor = Shade32(ifgcolor, mlight, desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); - BgraColor outcolor = Blend32(fgcolor, bgcolor, ifgcolor, ifgshade, srcalpha, destalpha); - - // Store result - if (mask1 & (1 << 31)) dest[x] = outcolor; - - mask1 <<= 1; - - dynlight.r = MAX(dynlight.r + dynlightstep.r, 0); - dynlight.g = MAX(dynlight.g + dynlightstep.g, 0); - dynlight.b = MAX(dynlight.b + dynlightstep.b, 0); - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - blockPosY.WorldX += gradientY.WorldX; - blockPosY.WorldY += gradientY.WorldY; - blockPosY.WorldZ += gradientY.WorldZ; - - dest += pitch; - } - } - } -}; - template class RectScreenDrawer32 { diff --git a/src/polyrenderer/drawers/poly_drawer32_sse2.h b/src/polyrenderer/drawers/poly_drawer32_sse2.h index 33af938dc..a99e3f1e7 100644 --- a/src/polyrenderer/drawers/poly_drawer32_sse2.h +++ b/src/polyrenderer/drawers/poly_drawer32_sse2.h @@ -346,461 +346,6 @@ namespace TriScreenDrawerModes } } -template -class TriScreenDrawer32 -{ -public: - static void Execute(int x, int y, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args) - { - using namespace TriScreenDrawerModes; - - bool is_simple_shade = args->uniforms->SimpleShade(); - - if (SamplerT::Mode == (int)Samplers::Texture) - { - bool is_nearest_filter = args->uniforms->NearestFilter(); - - if (is_simple_shade) - { - if (is_nearest_filter) - DrawBlock(x, y, mask0, mask1, args); - else - DrawBlock(x, y, mask0, mask1, args); - } - else - { - if (is_nearest_filter) - DrawBlock(x, y, mask0, mask1, args); - else - DrawBlock(x, y, mask0, mask1, args); - } - } - else if (SamplerT::Mode == (int)Samplers::Fuzz) - { - DrawBlock(x, y, mask0, mask1, args); - } - else // no linear filtering for translated, shaded, stencil, fill or skycap - { - if (is_simple_shade) - { - DrawBlock(x, y, mask0, mask1, args); - } - else - { - DrawBlock(x, y, mask0, mask1, args); - } - } - } - -private: - template - FORCEINLINE static void VECTORCALL DrawBlock(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args) - { - using namespace TriScreenDrawerModes; - - bool is_fixed_light = args->uniforms->FixedLight(); - uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; - uint32_t srcalpha = args->uniforms->SrcAlpha(); - uint32_t destalpha = args->uniforms->DestAlpha(); - - auto lights = args->uniforms->Lights(); - auto num_lights = args->uniforms->NumLights(); - __m128 worldnormal = _mm_setr_ps(args->uniforms->Normal().X, args->uniforms->Normal().Y, args->uniforms->Normal().Z, 0.0f); - uint32_t dynlightcolor = args->uniforms->DynLightColor(); - - // Calculate gradients - const ShadedTriVertex &v1 = *args->v1; - ScreenTriangleStepVariables gradientX = args->gradientX; - ScreenTriangleStepVariables gradientY = args->gradientY; - ScreenTriangleStepVariables blockPosY; - blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y); - blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y); - blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y); - blockPosY.WorldX = v1.worldX * v1.w + gradientX.WorldX * (destX - v1.x) + gradientY.WorldX * (destY - v1.y); - blockPosY.WorldY = v1.worldY * v1.w + gradientX.WorldY * (destX - v1.x) + gradientY.WorldY * (destY - v1.y); - blockPosY.WorldZ = v1.worldZ * v1.w + gradientX.WorldZ * (destX - v1.x) + gradientY.WorldZ * (destY - v1.y); - gradientX.W *= 8.0f; - gradientX.U *= 8.0f; - gradientX.V *= 8.0f; - gradientX.WorldX *= 8.0f; - gradientX.WorldY *= 8.0f; - gradientX.WorldZ *= 8.0f; - - // Output - uint32_t * RESTRICT destOrg = (uint32_t*)args->dest; - int pitch = args->pitch; - uint32_t *dest = destOrg + destX + destY * pitch; - - // Light - uint32_t light = args->uniforms->Light(); - float shade = 2.0f - (light + 12.0f) / 128.0f; - float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f); - light += (light >> 7); // 255 -> 256 - - // Sampling stuff - uint32_t color = args->uniforms->Color(); - const uint32_t * RESTRICT translation = (const uint32_t *)args->uniforms->Translation(); - const uint32_t * RESTRICT texPixels = (const uint32_t *)args->uniforms->TexturePixels(); - uint32_t texWidth = args->uniforms->TextureWidth(); - uint32_t texHeight = args->uniforms->TextureHeight(); - uint32_t oneU, oneV; - if (SamplerT::Mode != (int)Samplers::Fill) - { - oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; - oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; - } - else - { - oneU = 0; - oneV = 0; - } - - // Shade constants - __m128i inv_desaturate, shade_fade, shade_light; - int desaturate; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - inv_desaturate = _mm_setr_epi16(256, 256 - args->uniforms->ShadeDesaturate(), 256 - args->uniforms->ShadeDesaturate(), 256 - args->uniforms->ShadeDesaturate(), 256, 256 - args->uniforms->ShadeDesaturate(), 256 - args->uniforms->ShadeDesaturate(), 256 - args->uniforms->ShadeDesaturate()); - shade_fade = _mm_set_epi16(args->uniforms->ShadeFadeAlpha(), args->uniforms->ShadeFadeRed(), args->uniforms->ShadeFadeGreen(), args->uniforms->ShadeFadeBlue(), args->uniforms->ShadeFadeAlpha(), args->uniforms->ShadeFadeRed(), args->uniforms->ShadeFadeGreen(), args->uniforms->ShadeFadeBlue()); - shade_light = _mm_set_epi16(args->uniforms->ShadeLightAlpha(), args->uniforms->ShadeLightRed(), args->uniforms->ShadeLightGreen(), args->uniforms->ShadeLightBlue(), args->uniforms->ShadeLightAlpha(), args->uniforms->ShadeLightRed(), args->uniforms->ShadeLightGreen(), args->uniforms->ShadeLightBlue()); - desaturate = args->uniforms->ShadeDesaturate(); - } - else - { - inv_desaturate = _mm_setzero_si128(); - shade_fade = _mm_setzero_si128(); - shade_fade = _mm_setzero_si128(); - shade_light = _mm_setzero_si128(); - desaturate = 0; - } - - if (mask0 == 0xffffffff && mask1 == 0xffffffff) - { - for (int y = 0; y < 8; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - __m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W); - __m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW); - __m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - blockPosX.WorldX += gradientX.WorldX; - blockPosX.WorldY += gradientX.WorldY; - blockPosX.WorldZ += gradientX.WorldZ; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - mrcpW = _mm_set1_ps(1.0f / blockPosX.W); - worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW); - __m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - __m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3); - dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff,0xffff,0,0))), _mm_set1_epi16(256)), _mm_setzero_si128()); - dynlightstep = _mm_slli_epi16(dynlightstep, 1); - - for (int ix = 0; ix < 4; ix++) - { - // Load bgcolor - __m128i bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(dest + ix * 2)), _mm_setzero_si128()); - else - bgcolor = _mm_setzero_si128(); - - // Sample fgcolor - unsigned int ifgcolor[2], ifgshade[2]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[ix * 2]; - ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + ix * 2, destY + y); - posU += stepU; - posV += stepV; - - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[ix * 2 + 1]; - ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + ix * 2 + 1, destY + y); - posU += stepU; - posV += stepV; - - // Setup light - int lightpos0 = lightpos >> 8; - lightpos += lightstep; - int lightpos1 = lightpos >> 8; - lightpos += lightstep; - __m128i mlight = _mm_set_epi16(256, lightpos1, lightpos1, lightpos1, 256, lightpos0, lightpos0, lightpos0); - - __m128i shade_fade_lit; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - __m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight); - shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light); - } - else - { - shade_fade_lit = _mm_setzero_si128(); - } - - // Shade and blend - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); - __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); - - // Store result - _mm_storel_epi64((__m128i*)(dest + ix * 2), outcolor); - - dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128()); - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - blockPosY.WorldX += gradientY.WorldX; - blockPosY.WorldY += gradientY.WorldY; - blockPosY.WorldZ += gradientY.WorldZ; - - dest += pitch; - } - } - else - { - // mask0 loop: - for (int y = 0; y < 4; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - __m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W); - __m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW); - __m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - blockPosX.WorldX += gradientX.WorldX; - blockPosX.WorldY += gradientX.WorldY; - blockPosX.WorldZ += gradientX.WorldZ; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - mrcpW = _mm_set1_ps(1.0f / blockPosX.W); - worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW); - __m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - __m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3); - dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff, 0xffff, 0, 0))), _mm_set1_epi16(256)), _mm_setzero_si128()); - dynlightstep = _mm_slli_epi16(dynlightstep, 1); - - for (int x = 0; x < 4; x++) - { - // Load bgcolor - uint32_t desttmp[2]; - __m128i bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - { - if (mask0 & (1 << 31)) desttmp[0] = dest[x * 2]; - if (mask0 & (1 << 30)) desttmp[1] = dest[x * 2 + 1]; - bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - } - else - bgcolor = _mm_setzero_si128(); - - // Sample fgcolor - unsigned int ifgcolor[2], ifgshade[2]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[x * 2]; - ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + x * 2, destY + y); - posU += stepU; - posV += stepV; - - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = dest[x * 2 + 1]; - ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + x * 2 + 1, destY + y); - posU += stepU; - posV += stepV; - - // Setup light - int lightpos0 = lightpos >> 8; - lightpos += lightstep; - int lightpos1 = lightpos >> 8; - lightpos += lightstep; - __m128i mlight = _mm_set_epi16(256, lightpos1, lightpos1, lightpos1, 256, lightpos0, lightpos0, lightpos0); - - __m128i shade_fade_lit; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - __m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight); - shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light); - } - else - { - shade_fade_lit = _mm_setzero_si128(); - } - - // Shade and blend - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); - __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); - - // Store result - _mm_storel_epi64((__m128i*)desttmp, outcolor); - if (mask0 & (1 << 31)) dest[x * 2] = desttmp[0]; - if (mask0 & (1 << 30)) dest[x * 2 + 1] = desttmp[1]; - - dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128()); - - mask0 <<= 2; - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - blockPosY.WorldX += gradientY.WorldX; - blockPosY.WorldY += gradientY.WorldY; - blockPosY.WorldZ += gradientY.WorldZ; - - dest += pitch; - } - - // mask1 loop: - for (int y = 0; y < 4; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - __m128 mrcpW = _mm_set1_ps(1.0f / blockPosY.W); - __m128 worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosY.WorldX), mrcpW); - __m128i dynlight = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - blockPosX.WorldX += gradientX.WorldX; - blockPosX.WorldY += gradientX.WorldY; - blockPosX.WorldZ += gradientX.WorldZ; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - mrcpW = _mm_set1_ps(1.0f / blockPosX.W); - worldpos = _mm_mul_ps(_mm_loadu_ps(&blockPosX.WorldX), mrcpW); - __m128i dynlightnext = CalcDynamicLight(lights, num_lights, worldpos, worldnormal, dynlightcolor); - __m128i dynlightstep = _mm_srai_epi16(_mm_sub_epi16(dynlightnext, dynlight), 3); - dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, _mm_and_si128(dynlightstep, _mm_set_epi32(0xffff, 0xffff, 0, 0))), _mm_set1_epi16(256)), _mm_setzero_si128()); - dynlightstep = _mm_slli_epi16(dynlightstep, 1); - - for (int x = 0; x < 4; x++) - { - // Load bgcolor - uint32_t desttmp[2]; - __m128i bgcolor; - if (BlendT::Mode != (int)BlendModes::Opaque) - { - if (mask1 & (1 << 31)) desttmp[0] = dest[x * 2]; - if (mask1 & (1 << 30)) desttmp[1] = dest[x * 2 + 1]; - bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); - } - else - bgcolor = _mm_setzero_si128(); - - // Sample fgcolor - unsigned int ifgcolor[2], ifgshade[2]; - if (SamplerT::Mode == (int)Samplers::FogBoundary && (mask1 & (1 << 31))) color = dest[x * 2]; - ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + x * 2, destY + 4 + y); - posU += stepU; - posV += stepV; - - if (SamplerT::Mode == (int)Samplers::FogBoundary && (mask1 & (1 << 30))) color = dest[x * 2 + 1]; - ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight, destX + x * 2 + 1, destY + 4 + y); - posU += stepU; - posV += stepV; - - // Setup light - int lightpos0 = lightpos >> 8; - lightpos += lightstep; - int lightpos1 = lightpos >> 8; - lightpos += lightstep; - __m128i mlight = _mm_set_epi16(256, lightpos1, lightpos1, lightpos1, 256, lightpos0, lightpos0, lightpos0); - - __m128i shade_fade_lit; - if (ShadeModeT::Mode == (int)ShadeMode::Advanced) - { - __m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight); - shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light); - } - else - { - shade_fade_lit = _mm_setzero_si128(); - } - - // Shade and blend - __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light, dynlight); - __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); - - // Store result - _mm_storel_epi64((__m128i*)desttmp, outcolor); - if (mask1 & (1 << 31)) dest[x * 2] = desttmp[0]; - if (mask1 & (1 << 30)) dest[x * 2 + 1] = desttmp[1]; - - dynlight = _mm_max_epi16(_mm_min_epi16(_mm_add_epi16(dynlight, dynlightstep), _mm_set1_epi16(256)), _mm_setzero_si128()); - - mask1 <<= 2; - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - blockPosY.WorldX += gradientY.WorldX; - blockPosY.WorldY += gradientY.WorldY; - blockPosY.WorldZ += gradientY.WorldZ; - - dest += pitch; - } - } - } -}; - template class RectScreenDrawer32 { diff --git a/src/polyrenderer/drawers/poly_drawer8.h b/src/polyrenderer/drawers/poly_drawer8.h index d87b16194..1db272885 100644 --- a/src/polyrenderer/drawers/poly_drawer8.h +++ b/src/polyrenderer/drawers/poly_drawer8.h @@ -225,206 +225,6 @@ namespace TriScreenDrawerModes } } -template -class TriScreenDrawer8 -{ -public: - static void Execute(int destX, int destY, uint32_t mask0, uint32_t mask1, const TriDrawTriangleArgs *args) - { - using namespace TriScreenDrawerModes; - - bool is_fixed_light = args->uniforms->FixedLight(); - uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; - auto colormaps = args->uniforms->BaseColormap(); - uint32_t srcalpha = args->uniforms->SrcAlpha(); - uint32_t destalpha = args->uniforms->DestAlpha(); - - // Calculate gradients - const ShadedTriVertex &v1 = *args->v1; - ScreenTriangleStepVariables gradientX = args->gradientX; - ScreenTriangleStepVariables gradientY = args->gradientY; - ScreenTriangleStepVariables blockPosY; - blockPosY.W = v1.w + gradientX.W * (destX - v1.x) + gradientY.W * (destY - v1.y); - blockPosY.U = v1.u * v1.w + gradientX.U * (destX - v1.x) + gradientY.U * (destY - v1.y); - blockPosY.V = v1.v * v1.w + gradientX.V * (destX - v1.x) + gradientY.V * (destY - v1.y); - gradientX.W *= 8.0f; - gradientX.U *= 8.0f; - gradientX.V *= 8.0f; - - // Output - uint8_t * RESTRICT destOrg = args->dest; - int pitch = args->pitch; - uint8_t *dest = destOrg + destX + destY * pitch; - - // Light - uint32_t light = args->uniforms->Light(); - float shade = 2.0f - (light + 12.0f) / 128.0f; - float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f); - light += light >> 7; // 255 -> 256 - - // Sampling stuff - uint32_t color = args->uniforms->Color(); - const uint8_t * RESTRICT translation = args->uniforms->Translation(); - const uint8_t * RESTRICT texPixels = args->uniforms->TexturePixels(); - uint32_t texWidth = args->uniforms->TextureWidth(); - uint32_t texHeight = args->uniforms->TextureHeight(); - - if (mask0 == 0xffffffff && mask1 == 0xffffffff) - { - for (int y = 0; y < 8; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - for (int ix = 0; ix < 8; ix++) - { - int lightshade = lightpos >> 8; - uint8_t bgcolor = dest[ix]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor; - uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); - uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight, destX + ix, destY + y); - if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256; - dest[ix] = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); - posU += stepU; - posV += stepV; - lightpos += lightstep; - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - - dest += pitch; - } - } - else - { - // mask0 loop: - for (int y = 0; y < 4; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - for (int x = 0; x < 8; x++) - { - if (mask0 & (1 << 31)) - { - int lightshade = lightpos >> 8; - uint8_t bgcolor = dest[x]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor; - uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); - uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight, destX + x, destY + y); - if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256; - dest[x] = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); - } - - posU += stepU; - posV += stepV; - lightpos += lightstep; - - mask0 <<= 1; - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - - dest += pitch; - } - - // mask1 loop: - for (int y = 0; y < 4; y++) - { - float rcpW = 0x01000000 / blockPosY.W; - int32_t posU = (int32_t)(blockPosY.U * rcpW); - int32_t posV = (int32_t)(blockPosY.V * rcpW); - - fixed_t lightpos = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosY.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - - ScreenTriangleStepVariables blockPosX = blockPosY; - blockPosX.W += gradientX.W; - blockPosX.U += gradientX.U; - blockPosX.V += gradientX.V; - - rcpW = 0x01000000 / blockPosX.W; - int32_t nextU = (int32_t)(blockPosX.U * rcpW); - int32_t nextV = (int32_t)(blockPosX.V * rcpW); - int32_t stepU = (nextU - posU) / 8; - int32_t stepV = (nextV - posV) / 8; - - fixed_t lightnext = FRACUNIT - (fixed_t)(clamp(shade - MIN(24.0f / 32.0f, globVis * blockPosX.W), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightstep = (lightnext - lightpos) / 8; - lightstep = lightstep & lightmask; - - for (int x = 0; x < 8; x++) - { - if (mask1 & (1 << 31)) - { - int lightshade = lightpos >> 8; - uint8_t bgcolor = dest[x]; - if (SamplerT::Mode == (int)Samplers::FogBoundary) color = bgcolor; - uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); - uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight, destX + x, destY + 4 + y); - if (SamplerT::Mode == (int)Samplers::Fuzz) lightshade = 256; - dest[x] = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); - } - - posU += stepU; - posV += stepV; - lightpos += lightstep; - - mask1 <<= 1; - } - - blockPosY.W += gradientY.W; - blockPosY.U += gradientY.U; - blockPosY.V += gradientY.V; - - dest += pitch; - } - } - } -}; - template class RectScreenDrawer8 { diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 174da12f2..d5c245a5b 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -39,12 +39,19 @@ #include "screen_triangle.h" #include "x86.h" +static bool isBgraRenderTarget = false; + void PolyTriangleDrawer::ClearBuffers(DCanvas *canvas) { PolyStencilBuffer::Instance()->Clear(canvas->GetWidth(), canvas->GetHeight(), 0); PolyZBuffer::Instance()->Resize(canvas->GetPitch(), canvas->GetHeight()); } +bool PolyTriangleDrawer::IsBgra() +{ + return isBgraRenderTarget; +} + void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int y, int width, int height, DCanvas *canvas, bool span_drawers) { uint8_t *dest = (uint8_t*)canvas->GetBuffer(); @@ -52,6 +59,7 @@ void PolyTriangleDrawer::SetViewport(const DrawerCommandQueuePtr &queue, int x, int dest_height = canvas->GetHeight(); int dest_pitch = canvas->GetPitch(); bool dest_bgra = canvas->IsBgra(); + isBgraRenderTarget = dest_bgra; int offsetx = clamp(x, 0, dest_width); int offsety = clamp(y, 0, dest_height); diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index 628bdb009..b10888455 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -38,6 +38,8 @@ public: static void SetTwoSided(const DrawerCommandQueuePtr &queue, bool twosided); static void SetWeaponScene(const DrawerCommandQueuePtr &queue, bool enable); static void SetTransform(const DrawerCommandQueuePtr &queue, const Mat4f *objectToClip); + + static bool IsBgra(); }; class PolyTriangleThreadData diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index 9241d207f..a8a4fcd54 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -307,7 +307,7 @@ void TriangleBlock::RenderBlock(int x0, int y0, int x1, int y1) bool writeDepth = args->uniforms->WriteDepth(); int bmode = (int)args->uniforms->BlendMode(); - auto drawFunc = args->destBgra ? ScreenTriangle::TriDrawers32[bmode] : ScreenTriangle::TriDrawers8[bmode]; + auto drawFunc = args->destBgra ? ScreenTriangle::SpanDrawers32[bmode] : ScreenTriangle::SpanDrawers8[bmode]; // Loop through blocks for (int y = start_miny; y < y1; y += q * num_cores) @@ -345,7 +345,66 @@ void TriangleBlock::RenderBlock(int x0, int y0, int x1, int y1) } if (writeColor) - drawFunc(X, Y, Mask0, Mask1, args); + { + if (Mask0 == 0xffffffff) + { + drawFunc(Y, X, X + 8, args); + drawFunc(Y + 1, X, X + 8, args); + drawFunc(Y + 2, X, X + 8, args); + drawFunc(Y + 3, X, X + 8, args); + } + else if (Mask0 != 0) + { + uint32_t mask = Mask0; + for (int j = 0; j < 4; j++) + { + int start = 0; + int i; + for (i = 0; i < 8; i++) + { + if (!(mask & 0x80000000)) + { + if (i > start) + drawFunc(Y + j, X + start, X + i, args); + start = i + 1; + } + mask <<= 1; + } + if (i > start) + drawFunc(Y + j, X + start, X + i, args); + } + } + + if (Mask1 == 0xffffffff) + { + drawFunc(Y + 4, X, X + 8, args); + drawFunc(Y + 5, X, X + 8, args); + drawFunc(Y + 6, X, X + 8, args); + drawFunc(Y + 7, X, X + 8, args); + } + else if (Mask1 != 0) + { + uint32_t mask = Mask1; + for (int j = 4; j < 8; j++) + { + int start = 0; + int i; + for (i = 0; i < 8; i++) + { + if (!(mask & 0x80000000)) + { + if (i > start) + drawFunc(Y + j, X + start, X + i, args); + start = i + 1; + } + mask <<= 1; + } + if (i > start) + drawFunc(Y + j, X + start, X + i, args); + } + } + } + if (writeStencil) StencilWrite(); if (writeDepth) @@ -1249,210 +1308,102 @@ void ScreenTriangle::DrawSWRender(const TriDrawTriangleArgs *args, PolyTriangleT } } -template -void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) +template +void DrawSpanOpt32(int y, int x0, int x1, const TriDrawTriangleArgs *args) { using namespace TriScreenDrawerModes; - float v1X = args->v1->x; - float v1Y = args->v1->y; - float v1W = args->v1->w; - float v1U = args->v1->u * v1W; - float v1V = args->v1->v * v1W; - float stepXW = args->gradientX.W; - float stepXU = args->gradientX.U; - float stepXV = args->gradientX.V; - float startX = x0 + (0.5f - v1X); - float startY = y + (0.5f - v1Y); - float posXW = v1W + stepXW * startX + args->gradientY.W * startY; - float posXU = v1U + stepXU * startX + args->gradientY.U * startY; - float posXV = v1V + stepXV * startX + args->gradientY.V * startY; + float v1X, v1Y, v1W, v1U, v1V, v1WorldX, v1WorldY, v1WorldZ; + float startX, startY; + float stepXW, stepXU, stepXV, stepWorldX, stepWorldY, stepWorldZ; + float posXW, posXU, posXV, posWorldX, posWorldY, posWorldZ; - const uint32_t *texPixels = (const uint32_t*)args->uniforms->TexturePixels(); - const uint32_t *translation = (const uint32_t*)args->uniforms->Translation(); - int texWidth = args->uniforms->TextureWidth(); - int texHeight = args->uniforms->TextureHeight(); + PolyLight *lights; + int num_lights; + float worldnormalX, worldnormalY, worldnormalZ; + uint32_t dynlightcolor; + const uint32_t *texPixels, *translation; + int texWidth, texHeight; + uint32_t fillcolor; + int alpha; + uint32_t light; + fixed_t shade, lightpos, lightstep; + uint32_t shade_fade_r, shade_fade_g, shade_fade_b, shade_light_r, shade_light_g, shade_light_b, desaturate, inv_desaturate; - int fillcolor = args->uniforms->Color(); - int alpha = args->uniforms->SrcAlpha(); + v1X = args->v1->x; + v1Y = args->v1->y; + v1W = args->v1->w; + v1U = args->v1->u * v1W; + v1V = args->v1->v * v1W; + startX = x0 + (0.5f - v1X); + startY = y + (0.5f - v1Y); + stepXW = args->gradientX.W; + stepXU = args->gradientX.U; + stepXV = args->gradientX.V; + posXW = v1W + stepXW * startX + args->gradientY.W * startY; + posXU = v1U + stepXU * startX + args->gradientY.U * startY; + posXV = v1V + stepXV * startX + args->gradientY.V * startY; - bool is_fixed_light = args->uniforms->FixedLight(); - uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; - uint32_t light = args->uniforms->Light(); - float shade = 2.0f - (light + 12.0f) / 128.0f; - float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f); - light += light >> 7; // 255 -> 256 + texPixels = (const uint32_t*)args->uniforms->TexturePixels(); + translation = (const uint32_t*)args->uniforms->Translation(); + texWidth = args->uniforms->TextureWidth(); + texHeight = args->uniforms->TextureHeight(); + fillcolor = args->uniforms->Color(); + alpha = args->uniforms->Alpha(); + light = args->uniforms->Light(); + + if (OptT::Flags & SWOPT_FixedLight) + { + light += light >> 7; // 255 -> 256 + } + else + { + float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f); + + shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT); + lightpos = (fixed_t)(globVis * posXW * (float)FRACUNIT); + lightstep = (fixed_t)(globVis * stepXW * (float)FRACUNIT); + } + + if (OptT::Flags & SWOPT_DynLights) + { + v1WorldX = args->v1->worldX * v1W; + v1WorldY = args->v1->worldY * v1W; + v1WorldZ = args->v1->worldZ * v1W; + stepWorldX = args->gradientX.WorldX; + stepWorldY = args->gradientX.WorldY; + stepWorldZ = args->gradientX.WorldZ; + posWorldX = v1WorldX + stepWorldX * startX + args->gradientY.WorldX * startY; + posWorldY = v1WorldY + stepWorldY * startX + args->gradientY.WorldY * startY; + posWorldZ = v1WorldZ + stepWorldZ * startX + args->gradientY.WorldZ * startY; + + lights = args->uniforms->Lights(); + num_lights = args->uniforms->NumLights(); + worldnormalX = args->uniforms->Normal().X; + worldnormalY = args->uniforms->Normal().Y; + worldnormalZ = args->uniforms->Normal().Z; + dynlightcolor = args->uniforms->DynLightColor(); + } + + if (OptT::Flags & SWOPT_ColoredFog) + { + shade_fade_r = args->uniforms->ShadeFadeRed(); + shade_fade_g = args->uniforms->ShadeFadeGreen(); + shade_fade_b = args->uniforms->ShadeFadeBlue(); + shade_light_r = args->uniforms->ShadeLightRed(); + shade_light_g = args->uniforms->ShadeLightGreen(); + shade_light_b = args->uniforms->ShadeLightBlue(); + desaturate = args->uniforms->ShadeDesaturate(); + inv_desaturate = 256 - desaturate; + } uint32_t *dest = (uint32_t*)args->dest; uint32_t *destLine = dest + args->pitch * y; int x = x0; - -#ifndef NO_SSE - __m128i mfillcolor = _mm_set1_epi32(fillcolor); - __m128i mcapcolor = _mm_unpacklo_epi8(mfillcolor, _mm_setzero_si128()); - __m128i malpha = _mm_set1_epi32(alpha); - - int sseEnd = x0 + ((x1 - x0) & ~3); - while (x < sseEnd) - { - __m128i fg; - - if (ModeT::SWFlags & SWSTYLEF_Fill) - { - fg = mfillcolor; - } - else if (ModeT::SWFlags & SWSTYLEF_FogBoundary) - { - fg = _mm_loadl_epi64((const __m128i*)(destLine + x)); - } - else - { - float rcpW0 = 0x01000000 / posXW; - float rcpW1 = 0x01000000 / (posXW + stepXW); - - int32_t u0 = (int32_t)(posXU * rcpW0); - int32_t u1 = (int32_t)((posXU + stepXU) * rcpW1); - int32_t v0 = (int32_t)(posXV * rcpW0); - int32_t v1 = (int32_t)((posXV + stepXV) * rcpW1); - uint32_t texelX0 = ((((uint32_t)u0 << 8) >> 16) * texWidth) >> 16; - uint32_t texelX1 = ((((uint32_t)u1 << 8) >> 16) * texWidth) >> 16; - uint32_t texelY0 = ((((uint32_t)v0 << 8) >> 16) * texHeight) >> 16; - uint32_t texelY1 = ((((uint32_t)v1 << 8) >> 16) * texHeight) >> 16; - - if (ModeT::SWFlags & SWSTYLEF_Translated) - { - uint32_t fg0 = translation[((const uint8_t*)texPixels)[texelX0 * texHeight + texelY0]]; - uint32_t fg1 = translation[((const uint8_t*)texPixels)[texelX1 * texHeight + texelY1]]; - fg = _mm_setr_epi32(fg0, fg1, 0, 0); - } - else - { - uint32_t fg0 = texPixels[texelX0 * texHeight + texelY0]; - uint32_t fg1 = texPixels[texelX1 * texHeight + texelY1]; - fg = _mm_setr_epi32(fg0, fg1, 0, 0); - } - } - - if (ModeT::SWFlags & SWSTYLEF_Skycap) - { - float rcpW0 = 0x01000000 / posXW; - float rcpW1 = 0x01000000 / (posXW + stepXW); - int32_t v0 = (int32_t)(posXV * rcpW0); - int32_t v1 = (int32_t)((posXV + stepXV) * rcpW1); - - int start_fade = 2; // How fast it should fade out - __m128i v = _mm_setr_epi32(v0, v0, v1, v1); - __m128i alpha_top = _mm_min_epi16(_mm_max_epi16(_mm_srai_epi32(v, 16 - start_fade), _mm_setzero_si128()), _mm_set1_epi16(256)); - __m128i alpha_bottom = _mm_min_epi16(_mm_max_epi16(_mm_srai_epi32(_mm_sub_epi32(_mm_set1_epi32(2 << 24), v), 16 - start_fade), _mm_setzero_si128()), _mm_set1_epi16(256)); - __m128i a = _mm_min_epi16(alpha_top, alpha_bottom); - a = _mm_shufflelo_epi16(_mm_shufflehi_epi16(a, _MM_SHUFFLE(0, 0, 0, 0)), _MM_SHUFFLE(0, 0, 0, 0)); - __m128i inv_a = _mm_sub_epi32(_mm_set1_epi32(256), a); - - fg = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - __m128i c = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(fg, a), _mm_mullo_epi16(mcapcolor, inv_a)), _mm_set1_epi16(127)), 8); - _mm_storel_epi64((__m128i*)(destLine + x), _mm_packus_epi16(c, c)); - } - else - { - if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) - { - __m128i rgbmask = _mm_set1_epi32(0x00ffffff); - if (ModeT::Flags & STYLEF_RedIsAlpha) - fg = _mm_or_si128(_mm_andnot_si128(rgbmask, _mm_slli_epi32(fg, 8)), _mm_and_si128(rgbmask, mfillcolor)); - else - fg = _mm_or_si128(_mm_andnot_si128(rgbmask, fg), _mm_and_si128(rgbmask, mfillcolor)); - } - - if (!(ModeT::Flags & STYLEF_Alpha1)) - { - __m128i a = _mm_srli_epi32(fg, 24); - a = _mm_srli_epi32(_mm_mullo_epi16(a, malpha), 8); - fg = _mm_or_si128(_mm_and_si128(fg, _mm_set1_epi32(0x00ffffff)), _mm_slli_epi32(a, 24)); - } - - fg = _mm_unpacklo_epi8(fg, _mm_setzero_si128()); - - fixed_t lightpos0 = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * posXW), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - fixed_t lightpos1 = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * (posXW + stepXW)), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos0 = (lightpos0 & lightmask) | ((light << 8) & ~lightmask); - lightpos1 = (lightpos1 & lightmask) | ((light << 8) & ~lightmask); - int lightshade0 = lightpos0 >> 8; - int lightshade1 = lightpos1 >> 8; - __m128i shadedfg = _mm_srli_epi16(_mm_mullo_epi16(fg, _mm_setr_epi16(lightshade0, lightshade0, lightshade0, 256, lightshade1, lightshade1, lightshade1, 256)), 8); - - __m128i out; - if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) - { - out = shadedfg; - } - else if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_One) - { - __m128i dest = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(destLine + x)), _mm_setzero_si128()); - if (ModeT::BlendOp == STYLEOP_Add) - { - out = _mm_add_epi16(dest, shadedfg); - } - else if (ModeT::BlendOp == STYLEOP_RevSub) - { - out = _mm_sub_epi16(dest, shadedfg); - } - else //if (ModeT::BlendOp == STYLEOP_Sub) - { - out = _mm_sub_epi16(shadedfg, dest); - } - } - else if (ModeT::SWFlags & SWSTYLEF_SrcColorOneMinusSrcColor) - { - __m128i dest = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(destLine + x)), _mm_setzero_si128()); - __m128i sfactor = _mm_add_epi16(shadedfg, _mm_srli_epi16(shadedfg, 7)); - __m128i dfactor = _mm_sub_epi16(_mm_set1_epi16(256), sfactor); - out = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(dest, dfactor), _mm_mullo_epi16(shadedfg, sfactor)), _mm_set1_epi16(127)), 8); - } - else - { - __m128i dest = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i*)(destLine + x)), _mm_setzero_si128()); - - __m128i sfactor = _mm_shufflehi_epi16(_mm_shufflelo_epi16(shadedfg, _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); - sfactor = _mm_add_epi16(sfactor, _mm_srli_epi16(sfactor, 7)); // 255 -> 256 - __m128i dfactor = _mm_sub_epi16(_mm_set1_epi16(256), sfactor); - __m128i src = _mm_mullo_epi16(shadedfg, sfactor); - if (ModeT::BlendDest == STYLEALPHA_One) - { - dest = _mm_slli_epi16(dest, 8); - } - else - { - __m128i dfactor = _mm_sub_epi16(_mm_set1_epi16(256), sfactor); - dest = _mm_mullo_epi16(dest, dfactor); - } - - if (ModeT::BlendOp == STYLEOP_Add) - { - out = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(dest, src), _mm_set1_epi16(127)), 8); - } - else if (ModeT::BlendOp == STYLEOP_RevSub) - { - out = _mm_srli_epi16(_mm_add_epi16(_mm_sub_epi16(dest, src), _mm_set1_epi16(127)), 8); - } - else //if (ModeT::BlendOp == STYLEOP_Sub) - { - out = _mm_srli_epi16(_mm_add_epi16(_mm_sub_epi16(src, dest), _mm_set1_epi16(127)), 8); - } - } - _mm_storel_epi64((__m128i*)(destLine + x), _mm_or_si128(_mm_packus_epi16(out, out), _mm_set1_epi32(0xff000000))); - } - - posXW += stepXW + stepXW; - posXU += stepXU + stepXU; - posXV += stepXV + stepXV; - x += 2; - } -#endif - while (x < x1) { - uint32_t fg; + uint32_t fg = 0; if (ModeT::SWFlags & SWSTYLEF_Fill) { @@ -1462,7 +1413,7 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) { fg = destLine[x]; } - else + else if (ModeT::BlendOp != STYLEOP_Fuzz) { float rcpW = 0x01000000 / posXW; int32_t u = (int32_t)(posXU * rcpW); @@ -1474,13 +1425,48 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) { fg = translation[((const uint8_t*)texPixels)[texelX * texHeight + texelY]]; } + else if (ModeT::Flags & STYLEF_RedIsAlpha) + { + fg = ((const uint8_t*)texPixels)[texelX * texHeight + texelY]; + } else { fg = texPixels[texelX * texHeight + texelY]; } } - if (ModeT::SWFlags & SWSTYLEF_Skycap) + if (ModeT::BlendOp == STYLEOP_Fuzz) + { + using namespace swrenderer; + + float rcpW = 0x01000000 / posXW; + int32_t u = (int32_t)(posXU * rcpW); + int32_t v = (int32_t)(posXV * rcpW); + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + + uint32_t a = 256 - sampleshadeout; + + uint32_t dest = destLine[x]; + uint32_t out_r = (RPART(dest) * a) >> 8; + uint32_t out_g = (GPART(dest) * a) >> 8; + uint32_t out_b = (BPART(dest) * a) >> 8; + destLine[x] = MAKEARGB(255, out_r, out_g, out_b); + } + else if (ModeT::SWFlags & SWSTYLEF_Skycap) { float rcpW = 0x01000000 / posXW; int32_t v = (int32_t)(posXV * rcpW); @@ -1516,7 +1502,7 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) if ((ModeT::Flags & STYLEF_ColorIsFixed) && !(ModeT::SWFlags & SWSTYLEF_Fill)) { if (ModeT::Flags & STYLEF_RedIsAlpha) - fg = ((fg << 8) & 0xff000000) | (fillcolor & 0x00ffffff); + fg = (fg << 24) | (fillcolor & 0x00ffffff); else fg = (fg & 0xff000000) | (fillcolor & 0x00ffffff); } @@ -1528,12 +1514,105 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) fgalpha = (fgalpha * alpha) >> 8; } - fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * posXW), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - int lightshade = lightpos >> 8; - uint32_t shadedfg_r = (RPART(fg) * lightshade) >> 8; - uint32_t shadedfg_g = (GPART(fg) * lightshade) >> 8; - uint32_t shadedfg_b = (BPART(fg) * lightshade) >> 8; + int lightshade; + if (OptT::Flags & SWOPT_FixedLight) + { + lightshade = light; + } + else + { + fixed_t maxvis = 24 * FRACUNIT / 32; + fixed_t maxlight = 31 * FRACUNIT / 32; + lightshade = (FRACUNIT - clamp(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8; + } + + uint32_t lit_r = 0, lit_g = 0, lit_b = 0; + if (OptT::Flags & SWOPT_DynLights) + { + lit_r = RPART(dynlightcolor); + lit_g = GPART(dynlightcolor); + lit_b = BPART(dynlightcolor); + + float rcp_posXW = 1.0f / posXW; + float worldposX = posWorldX * rcp_posXW; + float worldposY = posWorldY * rcp_posXW; + float worldposZ = posWorldZ * rcp_posXW; + for (int i = 0; i < num_lights; i++) + { + float lightposX = lights[i].x; + float lightposY = lights[i].y; + float lightposZ = lights[i].z; + float light_radius = lights[i].radius; + uint32_t light_color = lights[i].color; + + bool is_attenuated = light_radius < 0.0f; + if (is_attenuated) + light_radius = -light_radius; + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + float Lx = lightposX - worldposX; + float Ly = lightposY - worldposY; + float Lz = lightposZ - worldposZ; + float dist2 = Lx * Lx + Ly * Ly + Lz * Lz; +#ifdef NO_SSE + //float rcp_dist = 1.0f / sqrt(dist2); + float rcp_dist = 1.0f / (dist2 * 0.01f); +#else + float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2))); +#endif + float dist = dist2 * rcp_dist; + float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f); + + // The simple light type + float simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = max(dot(N,normalize(L)),0) * attenuation + Lx *= rcp_dist; + Ly *= rcp_dist; + Lz *= rcp_dist; + float dotNL = worldnormalX * Lx + worldnormalY * Ly + worldnormalZ * Lz; + float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation; + + uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation); + + lit_r += (RPART(light_color) * attenuation) >> 8; + lit_g += (GPART(light_color) * attenuation) >> 8; + lit_b += (BPART(light_color) * attenuation) >> 8; + } + } + + uint32_t shadedfg_r, shadedfg_g, shadedfg_b; + if (OptT::Flags & SWOPT_ColoredFog) + { + uint32_t fg_r = RPART(fg); + uint32_t fg_g = GPART(fg); + uint32_t fg_b = BPART(fg); + uint32_t intensity = ((fg_r * 77 + fg_g * 143 + fg_b * 37) >> 8) * desaturate; + shadedfg_r = (((shade_fade_r + ((fg_r * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_r) >> 8; + shadedfg_g = (((shade_fade_g + ((fg_g * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_g) >> 8; + shadedfg_b = (((shade_fade_b + ((fg_b * inv_desaturate + intensity) >> 8) * lightshade) >> 8) * shade_light_b) >> 8; + + lit_r = MIN(lit_r, (uint32_t)256); + lit_g = MIN(lit_g, (uint32_t)256); + lit_b = MIN(lit_b, (uint32_t)256); + + shadedfg_r = MIN(shadedfg_r + ((fg_r * lit_r) >> 8), (uint32_t)255); + shadedfg_g = MIN(shadedfg_g + ((fg_g * lit_g) >> 8), (uint32_t)255); + shadedfg_b = MIN(shadedfg_b + ((fg_b * lit_b) >> 8), (uint32_t)255); + } + else + { + lit_r = MIN(lightshade + lit_r, (uint32_t)256); + lit_g = MIN(lightshade + lit_g, (uint32_t)256); + lit_b = MIN(lightshade + lit_b, (uint32_t)256); + + shadedfg_r = (RPART(fg) * lit_r) >> 8; + shadedfg_g = (GPART(fg) * lit_g) >> 8; + shadedfg_b = (BPART(fg) * lit_b) >> 8; + } if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) { @@ -1582,16 +1661,15 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) destLine[x] = MAKEARGB(255, out_r, out_g, out_b); } - else if (fgalpha == 255) + else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255) { destLine[x] = MAKEARGB(255, shadedfg_r, shadedfg_g, shadedfg_b); } - else if (fgalpha != 0) + else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0) { uint32_t dest = destLine[x]; uint32_t sfactor = fgalpha; sfactor += sfactor >> 7; // 255 -> 256 - uint32_t dfactor = 256 - sfactor; uint32_t src_r = shadedfg_r * sfactor; uint32_t src_g = shadedfg_g * sfactor; uint32_t src_b = shadedfg_b * sfactor; @@ -1648,49 +1726,139 @@ void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) posXW += stepXW; posXU += stepXU; posXV += stepXV; + if (OptT::Flags & SWOPT_DynLights) + { + posWorldX += stepWorldX; + posWorldY += stepWorldY; + posWorldZ += stepWorldZ; + } + if (!(OptT::Flags & SWOPT_FixedLight)) + lightpos += lightstep; x++; } } template -void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) +void DrawSpan32(int y, int x0, int x1, const TriDrawTriangleArgs *args) { using namespace TriScreenDrawerModes; - float v1X = args->v1->x; - float v1Y = args->v1->y; - float v1W = args->v1->w; - float v1U = args->v1->u * v1W; - float v1V = args->v1->v * v1W; - float stepXW = args->gradientX.W; - float stepXU = args->gradientX.U; - float stepXV = args->gradientX.V; - float startX = x0 + (0.5f - v1X); - float startY = y + (0.5f - v1Y); - float posXW = v1W + stepXW * startX + args->gradientY.W * startY; - float posXU = v1U + stepXU * startX + args->gradientY.U * startY; - float posXV = v1V + stepXV * startX + args->gradientY.V * startY; + if (args->uniforms->NumLights() == 0) + { + if (!args->uniforms->FixedLight()) + { + if (args->uniforms->SimpleShade()) + DrawSpanOpt32(y, x0, x1, args); + else + DrawSpanOpt32(y, x0, x1, args); + } + else + { + if (args->uniforms->SimpleShade()) + DrawSpanOpt32(y, x0, x1, args); + else + DrawSpanOpt32(y, x0, x1, args); + } + } + else + { + if (!args->uniforms->FixedLight()) + { + if (args->uniforms->SimpleShade()) + DrawSpanOpt32(y, x0, x1, args); + else + DrawSpanOpt32(y, x0, x1, args); + } + else + { + if (args->uniforms->SimpleShade()) + DrawSpanOpt32(y, x0, x1, args); + else + DrawSpanOpt32(y, x0, x1, args); + } + } +} - auto colormaps = args->uniforms->BaseColormap(); +template +void DrawSpanOpt8(int y, int x0, int x1, const TriDrawTriangleArgs *args) +{ + using namespace TriScreenDrawerModes; - const uint8_t *texPixels = args->uniforms->TexturePixels(); - const uint8_t *translation = args->uniforms->Translation(); - int texWidth = args->uniforms->TextureWidth(); - int texHeight = args->uniforms->TextureHeight(); + float v1X, v1Y, v1W, v1U, v1V, v1WorldX, v1WorldY, v1WorldZ; + float startX, startY; + float stepXW, stepXU, stepXV, stepWorldX, stepWorldY, stepWorldZ; + float posXW, posXU, posXV, posWorldX, posWorldY, posWorldZ; - int fillcolor = args->uniforms->Color(); - int alpha = args->uniforms->SrcAlpha(); + PolyLight *lights; + int num_lights; + float worldnormalX, worldnormalY, worldnormalZ; + uint32_t dynlightcolor; + const uint8_t *colormaps, *texPixels, *translation; + int texWidth, texHeight; + uint32_t fillcolor, capcolor; + int alpha; + uint32_t light; + fixed_t shade, lightpos, lightstep; + + v1X = args->v1->x; + v1Y = args->v1->y; + v1W = args->v1->w; + v1U = args->v1->u * v1W; + v1V = args->v1->v * v1W; + startX = x0 + (0.5f - v1X); + startY = y + (0.5f - v1Y); + stepXW = args->gradientX.W; + stepXU = args->gradientX.U; + stepXV = args->gradientX.V; + posXW = v1W + stepXW * startX + args->gradientY.W * startY; + posXU = v1U + stepXU * startX + args->gradientY.U * startY; + posXV = v1V + stepXV * startX + args->gradientY.V * startY; + + texPixels = args->uniforms->TexturePixels(); + translation = args->uniforms->Translation(); + texWidth = args->uniforms->TextureWidth(); + texHeight = args->uniforms->TextureHeight(); + fillcolor = args->uniforms->Color(); + alpha = args->uniforms->Alpha(); + colormaps = args->uniforms->BaseColormap(); + light = args->uniforms->Light(); - uint32_t capcolor = fillcolor; if (ModeT::SWFlags & SWSTYLEF_Skycap) - capcolor = GPalette.BaseColors[capcolor].d; + capcolor = GPalette.BaseColors[fillcolor].d; - bool is_fixed_light = args->uniforms->FixedLight(); - uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; - uint32_t light = args->uniforms->Light(); - float shade = 2.0f - (light + 12.0f) / 128.0f; - float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f); - light += light >> 7; // 255 -> 256 + if (OptT::Flags & SWOPT_FixedLight) + { + light += light >> 7; // 255 -> 256 + light = ((256 - light) * NUMCOLORMAPS) & 0xffffff00; + } + else + { + float globVis = args->uniforms->GlobVis() * (1.0f / 32.0f); + + shade = (fixed_t)((2.0f - (light + 12.0f) / 128.0f) * (float)FRACUNIT); + lightpos = (fixed_t)(globVis * posXW * (float)FRACUNIT); + lightstep = (fixed_t)(globVis * stepXW * (float)FRACUNIT); + } + + if (OptT::Flags & SWOPT_DynLights) + { + v1WorldX = args->v1->worldX * v1W; + v1WorldY = args->v1->worldY * v1W; + v1WorldZ = args->v1->worldZ * v1W; + stepWorldX = args->gradientX.WorldX; + stepWorldY = args->gradientX.WorldY; + stepWorldZ = args->gradientX.WorldZ; + posWorldX = v1WorldX + stepWorldX * startX + args->gradientY.WorldX * startY; + posWorldY = v1WorldY + stepWorldY * startX + args->gradientY.WorldY * startY; + posWorldZ = v1WorldZ + stepWorldZ * startX + args->gradientY.WorldZ * startY; + + lights = args->uniforms->Lights(); + num_lights = args->uniforms->NumLights(); + worldnormalX = args->uniforms->Normal().X; + worldnormalY = args->uniforms->Normal().Y; + worldnormalZ = args->uniforms->Normal().Z; + dynlightcolor = args->uniforms->DynLightColor(); + } uint8_t *dest = (uint8_t*)args->dest; uint8_t *destLine = dest + args->pitch * y; @@ -1698,7 +1866,7 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) int x = x0; while (x < x1) { - int fg; + int fg = 0; int fgalpha = 255; if (ModeT::SWFlags & SWSTYLEF_Fill) @@ -1709,7 +1877,7 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) { fg = destLine[x]; } - else + else if (ModeT::BlendOp != STYLEOP_Fuzz) { float rcpW = 0x01000000 / posXW; int32_t u = (int32_t)(posXU * rcpW); @@ -1724,7 +1892,37 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) fgalpha = (fg != 0) ? 255 : 0; } - if (ModeT::SWFlags & SWSTYLEF_Skycap) + if (ModeT::BlendOp == STYLEOP_Fuzz) + { + using namespace swrenderer; + + float rcpW = 0x01000000 / posXW; + int32_t u = (int32_t)(posXU * rcpW); + int32_t v = (int32_t)(posXV * rcpW); + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = (texPixels[texelX * texHeight + texelY] != 0) ? 256 : 0; + + fixed_t fuzzscale = (200 << FRACBITS) / viewheight; + + int scaled_x = (x * fuzzscale) >> FRACBITS; + int fuzz_x = fuzz_random_x_offset[scaled_x % FUZZ_RANDOM_X_SIZE] + fuzzpos; + + fixed_t fuzzcount = FUZZTABLE << FRACBITS; + fixed_t fuzz = ((fuzz_x << FRACBITS) + y * fuzzscale) % fuzzcount; + unsigned int alpha = fuzzoffset[fuzz >> FRACBITS]; + + sampleshadeout = (sampleshadeout * alpha) >> 5; + + uint32_t a = 256 - sampleshadeout; + + uint32_t dest = GPalette.BaseColors[destLine[x]].d; + uint32_t r = (RPART(dest) * a) >> 8; + uint32_t g = (GPART(dest) * a) >> 8; + uint32_t b = (BPART(dest) * a) >> 8; + destLine[x] = RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; + } + else if (ModeT::SWFlags & SWSTYLEF_Skycap) { float rcpW = 0x01000000 / posXW; int32_t v = (int32_t)(posXV * rcpW); @@ -1771,11 +1969,95 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) fgalpha = (fgalpha * alpha) >> 8; } - fixed_t lightpos = FRACUNIT - (int)(clamp(shade - MIN(24.0f / 32.0f, globVis * posXW), 0.0f, 31.0f / 32.0f) * (float)FRACUNIT); - lightpos = (lightpos & lightmask) | ((light << 8) & ~lightmask); - int lightshade = lightpos >> 8; - lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00; - uint8_t shadedfg = colormaps[lightshade + fg]; + uint8_t shadedfg; + if (OptT::Flags & SWOPT_FixedLight) + { + shadedfg = colormaps[light + fg]; + } + else + { + fixed_t maxvis = 24 * FRACUNIT / 32; + fixed_t maxlight = 31 * FRACUNIT / 32; + int lightshade = (FRACUNIT - clamp(shade - MIN(maxvis, lightpos), 0, maxlight)) >> 8; + lightshade = ((256 - lightshade) << 5) & 0xffffff00; + shadedfg = colormaps[lightshade + fg]; + } + + if (OptT::Flags & SWOPT_DynLights) + { + uint32_t lit_r = RPART(dynlightcolor); + uint32_t lit_g = GPART(dynlightcolor); + uint32_t lit_b = BPART(dynlightcolor); + +#ifdef NO_SSE + float rcp_posXW = 1.0f / posXW; +#else + float rcp_posXW = _mm_cvtss_f32(_mm_rcp_ss(_mm_set_ss(posXW))); +#endif + float worldposX = posWorldX * rcp_posXW; + float worldposY = posWorldY * rcp_posXW; + float worldposZ = posWorldZ * rcp_posXW; + for (int i = 0; i < num_lights; i++) + { + float lightposX = lights[i].x; + float lightposY = lights[i].y; + float lightposZ = lights[i].z; + float light_radius = lights[i].radius; + uint32_t light_color = lights[i].color; + + bool is_attenuated = light_radius < 0.0f; + if (is_attenuated) + light_radius = -light_radius; + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + float Lx = lightposX - worldposX; + float Ly = lightposY - worldposY; + float Lz = lightposZ - worldposZ; + float dist2 = Lx * Lx + Ly * Ly + Lz * Lz; +#ifdef NO_SSE + //float rcp_dist = 1.0f / sqrt(dist2); + float rcp_dist = 1.0f / (dist2 * 0.01f); +#else + float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2))); +#endif + float dist = dist2 * rcp_dist; + float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f); + + // The simple light type + float simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = max(dot(N,normalize(L)),0) * attenuation + Lx *= rcp_dist; + Ly *= rcp_dist; + Lz *= rcp_dist; + float dotNL = worldnormalX * Lx + worldnormalY * Ly + worldnormalZ * Lz; + float point_attenuation = MAX(dotNL, 0.0f) * distance_attenuation; + + uint32_t attenuation = (uint32_t)(is_attenuated ? (int32_t)point_attenuation : (int32_t)simple_attenuation); + + lit_r += (RPART(light_color) * attenuation) >> 8; + lit_g += (GPART(light_color) * attenuation) >> 8; + lit_b += (BPART(light_color) * attenuation) >> 8; + } + + if (lit_r || lit_g || lit_b) + { + lit_r = MIN(lit_r, (uint32_t)256); + lit_g = MIN(lit_g, (uint32_t)256); + lit_b = MIN(lit_b, (uint32_t)256); + + uint32_t fgrgb = GPalette.BaseColors[fg]; + uint32_t shadedfgrgb = GPalette.BaseColors[shadedfg]; + + uint32_t out_r = MIN(((RPART(fgrgb) * lit_r) >> 8) + RPART(shadedfgrgb), (uint32_t)255); + uint32_t out_g = MIN(((GPART(fgrgb) * lit_g) >> 8) + GPART(shadedfgrgb), (uint32_t)255); + uint32_t out_b = MIN(((BPART(fgrgb) * lit_b) >> 8) + BPART(shadedfgrgb), (uint32_t)255); + shadedfg = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; + } + } if (ModeT::BlendSrc == STYLEALPHA_One && ModeT::BlendDest == STYLEALPHA_Zero) { @@ -1826,11 +2108,11 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) destLine[x] = RGB256k.All[((out_r >> 2) << 12) | ((out_g >> 2) << 6) | (out_b >> 2)]; } - else if (fgalpha == 255) + else if (ModeT::BlendSrc == STYLEALPHA_Src && ModeT::BlendDest == STYLEALPHA_InvSrc && fgalpha == 255) { destLine[x] = shadedfg; } - else if (fgalpha != 0) + else if (ModeT::BlendSrc != STYLEALPHA_Src || ModeT::BlendDest != STYLEALPHA_InvSrc || fgalpha != 0) { uint32_t src = GPalette.BaseColors[shadedfg]; uint32_t dest = GPalette.BaseColors[destLine[x]]; @@ -1893,10 +2175,39 @@ void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) posXW += stepXW; posXU += stepXU; posXV += stepXV; + if (OptT::Flags & SWOPT_DynLights) + { + posWorldX += stepWorldX; + posWorldY += stepWorldY; + posWorldZ += stepWorldZ; + } + if (!(OptT::Flags & SWOPT_FixedLight)) + lightpos += lightstep; x++; } } +template +void DrawSpan8(int y, int x0, int x1, const TriDrawTriangleArgs *args) +{ + using namespace TriScreenDrawerModes; + + if (args->uniforms->NumLights() == 0) + { + if (!args->uniforms->FixedLight()) + DrawSpanOpt8(y, x0, x1, args); + else + DrawSpanOpt8(y, x0, x1, args); + } + else + { + if (!args->uniforms->FixedLight()) + DrawSpanOpt8(y, x0, x1, args); + else + DrawSpanOpt8(y, x0, x1, args); + } +} + void(*ScreenTriangle::SpanDrawers8[])(int, int, int, const TriDrawTriangleArgs *) = { &DrawSpan8, diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index 7aba16c77..036e4a55b 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -251,6 +251,22 @@ namespace TriScreenDrawerModes struct FuzzSampler { static const int Mode = (int)Samplers::Fuzz; }; struct FogBoundarySampler { static const int Mode = (int)Samplers::FogBoundary; }; + enum SWOptFlags + { + SWOPT_DynLights = 1, + SWOPT_ColoredFog = 2, + SWOPT_FixedLight = 4 + }; + + struct DrawerOpt { static const int Flags = 0; }; + struct DrawerOptF { static const int Flags = SWOPT_FixedLight; }; + struct DrawerOptC { static const int Flags = SWOPT_ColoredFog; }; + struct DrawerOptCF { static const int Flags = SWOPT_ColoredFog | SWOPT_FixedLight; }; + struct DrawerOptL { static const int Flags = SWOPT_DynLights; }; + struct DrawerOptLC { static const int Flags = SWOPT_DynLights | SWOPT_ColoredFog; }; + struct DrawerOptLF { static const int Flags = SWOPT_DynLights | SWOPT_FixedLight; }; + struct DrawerOptLCF { static const int Flags = SWOPT_DynLights | SWOPT_ColoredFog | SWOPT_FixedLight; }; + static const int fuzzcolormap[FUZZTABLE] = { 6, 11, 6, 11, 6, 6, 11, 6, 6, 11, diff --git a/src/polyrenderer/scene/poly_model.cpp b/src/polyrenderer/scene/poly_model.cpp index c93360942..344749659 100644 --- a/src/polyrenderer/scene/poly_model.cpp +++ b/src/polyrenderer/scene/poly_model.cpp @@ -56,12 +56,16 @@ void PolyModelRenderer::BeginDrawModel(AActor *actor, FSpriteModelFrame *smf, co ModelActor = actor; const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); SetTransform(); - PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + + if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) + PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); } void PolyModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) { - PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) + PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + ModelActor = nullptr; } @@ -100,14 +104,18 @@ void PolyModelRenderer::BeginDrawHUDModel(AActor *actor, const VSMatrix &objectT const_cast(objectToWorldMatrix).copy(ObjectToWorld.Matrix); SetTransform(); PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, true); - PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + + if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) + PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); } void PolyModelRenderer::EndDrawHUDModel(AActor *actor) { ModelActor = nullptr; PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, false); - PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + + if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) + PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); } void PolyModelRenderer::SetInterpolation(double interpolation) @@ -146,8 +154,7 @@ void PolyModelRenderer::DrawArrays(int start, int count) args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, PolyRenderer::Instance()->Light.SpriteGlobVis(foggy), fullbrightSprite); args.SetStencilTestValue(StencilValue); args.SetClipPlane(0, PolyClipPlane()); - args.SetStyle(TriBlendMode::Opaque); - args.SetTexture(SkinTexture, DefaultRenderStyle()); + args.SetStyle(ModelActor->RenderStyle, ModelActor->Alpha, ModelActor->fillcolor, ModelActor->Translation, SkinTexture, fullbrightSprite); args.SetDepthTest(true); args.SetWriteDepth(true); args.SetWriteStencil(false); @@ -169,8 +176,7 @@ void PolyModelRenderer::DrawElements(int numIndices, size_t offset) args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, PolyRenderer::Instance()->Light.SpriteGlobVis(foggy), fullbrightSprite); args.SetStencilTestValue(StencilValue); args.SetClipPlane(0, PolyClipPlane()); - args.SetStyle(TriBlendMode::Opaque); - args.SetTexture(SkinTexture, DefaultRenderStyle()); + args.SetStyle(ModelActor->RenderStyle, ModelActor->Alpha, ModelActor->fillcolor, ModelActor->Translation, SkinTexture, fullbrightSprite); args.SetDepthTest(true); args.SetWriteDepth(true); args.SetWriteStencil(false); diff --git a/src/polyrenderer/scene/poly_wall.cpp b/src/polyrenderer/scene/poly_wall.cpp index ceac4a4da..68bc662dc 100644 --- a/src/polyrenderer/scene/poly_wall.cpp +++ b/src/polyrenderer/scene/poly_wall.cpp @@ -354,7 +354,14 @@ void RenderPolyWall::Render(PolyRenderThread *thread) } else { - args.SetStyle(Additive ? TriBlendMode::Add : TriBlendMode::Normal, MIN(Alpha, 1.0)); + double a = MIN(Alpha, 1.0); + if (Additive) + args.SetStyle(TriBlendMode::Add, a); + else if (a < 1.0) + args.SetStyle(TriBlendMode::Translucent, a); + else + args.SetStyle(TriBlendMode::Normal); + args.SetStencilTestValue(StencilValue + 1); args.SetDepthTest(true); args.SetWriteDepth(true); diff --git a/src/swrenderer/things/r_model.cpp b/src/swrenderer/things/r_model.cpp index 1c3349e66..025263e34 100644 --- a/src/swrenderer/things/r_model.cpp +++ b/src/swrenderer/things/r_model.cpp @@ -120,13 +120,17 @@ namespace swrenderer } SetTransform(); - PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + + if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) + PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); } void SWModelRenderer::EndDrawModel(AActor *actor, FSpriteModelFrame *smf) { + if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal] || !!(smf->flags & MDL_DONTCULLBACKFACES)) + PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + ModelActor = nullptr; - PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); } IModelVertexBuffer *SWModelRenderer::CreateVertexBuffer(bool needindex, bool singleframe) @@ -185,14 +189,18 @@ namespace swrenderer ClipBottom = {}; SetTransform(); PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, true); - PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); + + if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) + PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, true); } void SWModelRenderer::EndDrawHUDModel(AActor *actor) { ModelActor = nullptr; PolyTriangleDrawer::SetWeaponScene(Thread->DrawQueue, false); - PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); + + if (actor->RenderStyle == LegacyRenderStyles[STYLE_Normal]) + PolyTriangleDrawer::SetTwoSided(Thread->DrawQueue, false); } void SWModelRenderer::SetInterpolation(double interpolation) @@ -229,13 +237,7 @@ namespace swrenderer PolyDrawArgs args; args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, Thread->Light->SpriteGlobVis(foggy), fullbrightSprite); - args.SetStyle(TriBlendMode::Opaque); - - if (Thread->Viewport->RenderTarget->IsBgra()) - args.SetTexture((const uint8_t *)SkinTexture->GetPixelsBgra(), SkinTexture->GetWidth(), SkinTexture->GetHeight()); - else - args.SetTexture(SkinTexture->GetPixels(DefaultRenderStyle()), SkinTexture->GetWidth(), SkinTexture->GetHeight()); - + args.SetStyle(ModelActor->RenderStyle, ModelActor->Alpha, ModelActor->fillcolor, ModelActor->Translation, SkinTexture, fullbrightSprite); args.SetDepthTest(true); args.SetWriteDepth(true); args.SetWriteStencil(false); @@ -259,13 +261,7 @@ namespace swrenderer PolyDrawArgs args; args.SetLight(GetColorTable(sector->Colormap, sector->SpecialColors[sector_t::sprites], true), lightlevel, Thread->Light->SpriteGlobVis(foggy), fullbrightSprite); - args.SetStyle(TriBlendMode::Opaque); - - if (Thread->Viewport->RenderTarget->IsBgra()) - args.SetTexture((const uint8_t *)SkinTexture->GetPixelsBgra(), SkinTexture->GetWidth(), SkinTexture->GetHeight()); - else - args.SetTexture(SkinTexture->GetPixels(DefaultRenderStyle()), SkinTexture->GetWidth(), SkinTexture->GetHeight()); - + args.SetStyle(ModelActor->RenderStyle, ModelActor->Alpha, ModelActor->fillcolor, ModelActor->Translation, SkinTexture, fullbrightSprite); args.SetDepthTest(true); args.SetWriteDepth(true); args.SetWriteStencil(false);