From e8ba1156afc718356d8a66b8aef8c3035d430e2f Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 28 Mar 2017 16:46:22 +0200 Subject: [PATCH] - add rect drawers for softpoly's player sprites --- src/polyrenderer/drawers/poly_draw_args.cpp | 152 ++++ src/polyrenderer/drawers/poly_draw_args.h | 68 ++ src/polyrenderer/drawers/poly_drawer32_sse2.h | 686 +++++++++++------- src/polyrenderer/drawers/poly_drawer8.h | 445 +++++++----- src/polyrenderer/drawers/poly_triangle.cpp | 20 + src/polyrenderer/drawers/poly_triangle.h | 12 + src/polyrenderer/drawers/screen_triangle.cpp | 72 +- src/polyrenderer/drawers/screen_triangle.h | 8 +- src/polyrenderer/scene/poly_playersprite.cpp | 42 +- 9 files changed, 1015 insertions(+), 490 deletions(-) diff --git a/src/polyrenderer/drawers/poly_draw_args.cpp b/src/polyrenderer/drawers/poly_draw_args.cpp index a9bfde1f55..4c526d1ef7 100644 --- a/src/polyrenderer/drawers/poly_draw_args.cpp +++ b/src/polyrenderer/drawers/poly_draw_args.cpp @@ -199,3 +199,155 @@ void PolyDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint3 SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0 - alpha); } } + +///////////////////////////////////////////////////////////////////////////// + +void RectDrawArgs::SetTexture(const uint8_t *texels, int width, int height) +{ + mTexturePixels = texels; + mTextureWidth = width; + mTextureHeight = height; + mTranslation = nullptr; +} + +void RectDrawArgs::SetTexture(FTexture *texture) +{ + mTextureWidth = texture->GetWidth(); + mTextureHeight = texture->GetHeight(); + if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + mTexturePixels = (const uint8_t *)texture->GetPixelsBgra(); + else + mTexturePixels = texture->GetPixels(); + mTranslation = nullptr; +} + +void RectDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, bool forcePal) +{ + if (translationID != 0xffffffff && translationID != 0) + { + FRemapTable *table = TranslationToTable(translationID); + if (table != nullptr && !table->Inactive) + { + if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + mTranslation = (uint8_t*)table->Palette; + else + mTranslation = table->Remap; + + mTextureWidth = texture->GetWidth(); + mTextureHeight = texture->GetHeight(); + mTexturePixels = texture->GetPixels(); + return; + } + } + + if (forcePal) + { + mTextureWidth = texture->GetWidth(); + mTextureHeight = texture->GetHeight(); + mTexturePixels = texture->GetPixels(); + } + else + { + SetTexture(texture); + } +} + +void RectDrawArgs::SetLight(FSWColormap *base_colormap, uint32_t lightlevel) +{ + mLight = clamp(lightlevel, 0, 255); + mLightRed = base_colormap->Color.r * 256 / 255; + mLightGreen = base_colormap->Color.g * 256 / 255; + mLightBlue = base_colormap->Color.b * 256 / 255; + mLightAlpha = base_colormap->Color.a * 256 / 255; + mFadeRed = base_colormap->Fade.r; + mFadeGreen = base_colormap->Fade.g; + mFadeBlue = base_colormap->Fade.b; + mFadeAlpha = base_colormap->Fade.a; + mDesaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + mSimpleShade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + mColormaps = base_colormap->Maps; +} + +void RectDrawArgs::SetColor(uint32_t bgra, uint8_t palindex) +{ + if (PolyRenderer::Instance()->RenderTarget->IsBgra()) + { + mColor = bgra; + } + else + { + mColor = palindex; + } +} + +void RectDrawArgs::Draw(double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1) +{ + mX0 = (float)x0; + mX1 = (float)x1; + mY0 = (float)y0; + mY1 = (float)y1; + mU0 = (float)u0; + mU1 = (float)u1; + mV0 = (float)v0; + mV1 = (float)v1; + PolyRenderer::Instance()->DrawQueue->Push(*this); +} + +void RectDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright) +{ + bool forcePal = (renderstyle == LegacyRenderStyles[STYLE_Shaded] || renderstyle == LegacyRenderStyles[STYLE_AddShaded]); + SetTexture(tex, translationID, forcePal); + + if (renderstyle == LegacyRenderStyles[STYLE_Normal] || (r_drawfuzz == 0 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) + { + SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 1.0, 0.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Add] && fullbright && alpha == 1.0 && !Translation()) + { + SetStyle(TriBlendMode::TextureAddSrcColor, 1.0, 1.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Add]) + { + SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Subtract]) + { + SetStyle(Translation() ? TriBlendMode::TranslatedRevSub : TriBlendMode::TextureRevSub, alpha, 1.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_SoulTrans]) + { + SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, transsouls, 1.0 - transsouls); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Fuzzy] || (r_drawfuzz == 2 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) + { // NYI - Fuzzy - for now, just a copy of "Shadow" + SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 0.0, 160 / 255.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Shadow] || (r_drawfuzz == 1 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy])) + { + SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 0.0, 160 / 255.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_TranslucentStencil]) + { + SetColor(0xff000000 | fillcolor, fillcolor >> 24); + SetStyle(TriBlendMode::Stencil, alpha, 1.0 - alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_AddStencil]) + { + SetColor(0xff000000 | fillcolor, fillcolor >> 24); + SetStyle(TriBlendMode::AddStencil, alpha, 1.0); + } + else if (renderstyle == LegacyRenderStyles[STYLE_Shaded]) + { + SetColor(0xff000000 | fillcolor, fillcolor >> 24); + SetStyle(TriBlendMode::Shaded, alpha, 1.0 - alpha); + } + else if (renderstyle == LegacyRenderStyles[STYLE_AddShaded]) + { + SetColor(0xff000000 | fillcolor, fillcolor >> 24); + SetStyle(TriBlendMode::AddShaded, alpha, 1.0); + } + else + { + SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0 - alpha); + } +} diff --git a/src/polyrenderer/drawers/poly_draw_args.h b/src/polyrenderer/drawers/poly_draw_args.h index a77ef7c988..c90aaadd06 100644 --- a/src/polyrenderer/drawers/poly_draw_args.h +++ b/src/polyrenderer/drawers/poly_draw_args.h @@ -149,3 +149,71 @@ private: bool mNearestFilter = true; bool mFixedLight = false; }; + +class RectDrawArgs +{ +public: + void SetTexture(const uint8_t *texels, int width, int height); + void SetTexture(FTexture *texture); + void SetTexture(FTexture *texture, uint32_t translationID, bool forcePal = false); + void SetLight(FSWColormap *basecolormap, uint32_t lightlevel); + void SetStyle(TriBlendMode blendmode, double srcalpha = 1.0, double destalpha = 1.0) { mBlendMode = blendmode; mSrcAlpha = (uint32_t)(srcalpha * 256.0 + 0.5); mDestAlpha = (uint32_t)(destalpha * 256.0 + 0.5); } + void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright); + void SetColor(uint32_t bgra, uint8_t palindex); + void Draw(double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1); + + const uint8_t *TexturePixels() const { return mTexturePixels; } + int TextureWidth() const { return mTextureWidth; } + int TextureHeight() const { return mTextureHeight; } + const uint8_t *Translation() const { return mTranslation; } + + TriBlendMode BlendMode() const { return mBlendMode; } + uint32_t Color() const { return mColor; } + uint32_t SrcAlpha() const { return mSrcAlpha; } + uint32_t DestAlpha() const { return mDestAlpha; } + + uint32_t Light() const { return mLight; } + const uint8_t *BaseColormap() const { return mColormaps; } + uint16_t ShadeLightAlpha() const { return mLightAlpha; } + uint16_t ShadeLightRed() const { return mLightRed; } + uint16_t ShadeLightGreen() const { return mLightGreen; } + uint16_t ShadeLightBlue() const { return mLightBlue; } + uint16_t ShadeFadeAlpha() const { return mFadeAlpha; } + uint16_t ShadeFadeRed() const { return mFadeRed; } + uint16_t ShadeFadeGreen() const { return mFadeGreen; } + uint16_t ShadeFadeBlue() const { return mFadeBlue; } + uint16_t ShadeDesaturate() const { return mDesaturate; } + bool SimpleShade() const { return mSimpleShade; } + + float X0() const { return mX0; } + float X1() const { return mX1; } + float Y0() const { return mY0; } + float Y1() const { return mY1; } + float U0() const { return mU0; } + float U1() const { return mU1; } + float V0() const { return mV0; } + float V1() const { return mV1; } + +private: + const uint8_t *mTexturePixels = nullptr; + int mTextureWidth = 0; + int mTextureHeight = 0; + const uint8_t *mTranslation = nullptr; + const uint8_t *mColormaps = nullptr; + TriBlendMode mBlendMode = TriBlendMode::FillOpaque; + uint32_t mLight = 0; + uint32_t mColor = 0; + uint32_t mSrcAlpha = 0; + uint32_t mDestAlpha = 0; + uint16_t mLightAlpha = 0; + uint16_t mLightRed = 0; + uint16_t mLightGreen = 0; + uint16_t mLightBlue = 0; + uint16_t mFadeAlpha = 0; + uint16_t mFadeRed = 0; + uint16_t mFadeGreen = 0; + uint16_t mFadeBlue = 0; + uint16_t mDesaturate = 0; + bool mSimpleShade = true; + float mX0, mX1, mY0, mY1, mU0, mU1, mV0, mV1; +}; diff --git a/src/polyrenderer/drawers/poly_drawer32_sse2.h b/src/polyrenderer/drawers/poly_drawer32_sse2.h index 20d0a9efcd..bb94632bc1 100644 --- a/src/polyrenderer/drawers/poly_drawer32_sse2.h +++ b/src/polyrenderer/drawers/poly_drawer32_sse2.h @@ -24,6 +24,246 @@ #include "screen_triangle.h" +namespace TriScreenDrawerModes +{ + template + FORCEINLINE unsigned int VECTORCALL Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation) + { + uint32_t texel; + if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill) + { + return color; + } + else if (SamplerT::Mode == (int)Samplers::Translated) + { + const uint8_t *texpal = (const uint8_t *)texPixels; + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + return translation[texpal[texelX * texHeight + texelY]]; + } + else if (FilterModeT::Mode == (int)FilterModes::Nearest) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + texel = texPixels[texelX * texHeight + texelY]; + } + else + { + u -= oneU >> 1; + v -= oneV >> 1; + + unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth; + unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth; + unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight; + unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight; + unsigned int x0 = frac_x0 >> FRACBITS; + unsigned int x1 = frac_x1 >> FRACBITS; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = texPixels[x0 * texHeight + y0]; + unsigned int p01 = texPixels[x0 * texHeight + y1]; + unsigned int p10 = texPixels[x1 * texHeight + y0]; + unsigned int p11 = texPixels[x1 * texHeight + y1]; + + unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15; + unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + } + + if (SamplerT::Mode == (int)Samplers::Skycap) + { + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(v >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; + + uint32_t r = RPART(texel); + uint32_t g = GPART(texel); + uint32_t b = BPART(texel); + uint32_t fg_a = APART(texel); + uint32_t bg_red = RPART(color); + uint32_t bg_green = GPART(color); + uint32_t bg_blue = BPART(color); + r = (r * a + bg_red * inv_a + 127) >> 8; + g = (g * a + bg_green * inv_a + 127) >> 8; + b = (b * a + bg_blue * inv_a + 127) >> 8; + return MAKEARGB(fg_a, r, g, b); + } + else + { + return texel; + } + } + + template + FORCEINLINE unsigned int VECTORCALL SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight) + { + if (SamplerT::Mode == (int)Samplers::Shaded) + { + const uint8_t *texpal = (const uint8_t *)texPixels; + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else if (SamplerT::Mode == (int)Samplers::Stencil) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else + { + return 0; + } + } + + template + FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light) + { + if (ShadeModeT::Mode == (int)ShadeMode::Simple) + { + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + } + else + { + int blue0 = BPART(ifgcolor0); + int green0 = GPART(ifgcolor0); + int red0 = RPART(ifgcolor0); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor1); + int green1 = GPART(ifgcolor1); + int red1 = RPART(ifgcolor1); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + } + return fgcolor; + } + + template + FORCEINLINE __m128i VECTORCALL Blend32(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) + { + if (BlendT::Mode == (int)BlendModes::Opaque) + { + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::Masked) + { + __m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128()); + mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); + __m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor)); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) + { + __m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7))); + __m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8)); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::Shaded) + { + ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; + ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; + __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddClampShaded) + { + ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; + ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; + __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); + + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8); + __m128i outcolor = _mm_add_epi16(fgcolor, bgcolor); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else + { + uint32_t alpha0 = APART(ifgcolor0); + uint32_t alpha1 = APART(ifgcolor1); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo, out_hi; + if (BlendT::Mode == (int)BlendModes::AddClamp) + { + out_lo = _mm_add_epi32(fg_lo, bg_lo); + out_hi = _mm_add_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)BlendModes::SubClamp) + { + out_lo = _mm_sub_epi32(fg_lo, bg_lo); + out_hi = _mm_sub_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)BlendModes::RevSubClamp) + { + out_lo = _mm_sub_epi32(bg_lo, fg_lo); + out_hi = _mm_sub_epi32(bg_hi, fg_hi); + } + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + } +} + template class TriScreenDrawer32 { @@ -66,6 +306,7 @@ public: } } +private: template FORCEINLINE static void VECTORCALL Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread) { @@ -198,13 +439,13 @@ public: // Sample fgcolor unsigned int ifgcolor[2], ifgshade[2]; - ifgcolor[0] = Sample(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight); + ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); posU += stepU; posV += stepV; - ifgcolor[1] = Sample(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight); + ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); posU += stepU; posV += stepV; @@ -228,8 +469,8 @@ public: // Shade and blend __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); - __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); + __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); // Store result _mm_storel_epi64((__m128i*)(dest + x * 8 + ix * 2), outcolor); @@ -298,13 +539,13 @@ public: // Sample fgcolor unsigned int ifgcolor[2], ifgshade[2]; - ifgcolor[0] = Sample(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight); + ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); posU += stepU; posV += stepV; - ifgcolor[1] = Sample(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight); + ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); posU += stepU; posV += stepV; @@ -328,8 +569,8 @@ public: // Shade and blend __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); - __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); + __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); // Store result _mm_storel_epi64((__m128i*)desttmp, outcolor); @@ -387,13 +628,13 @@ public: // Sample fgcolor unsigned int ifgcolor[2], ifgshade[2]; - ifgcolor[0] = Sample(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight); + ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); posU += stepU; posV += stepV; - ifgcolor[1] = Sample(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); - ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight); + ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); posU += stepU; posV += stepV; @@ -417,8 +658,8 @@ public: // Shade and blend __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); - fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); - __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); + __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); // Store result _mm_storel_epi64((__m128i*)desttmp, outcolor); @@ -437,250 +678,6 @@ public: } } -private: - template - FORCEINLINE static unsigned int VECTORCALL Sample(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation) - { - using namespace TriScreenDrawerModes; - - uint32_t texel; - if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill) - { - return color; - } - else if (SamplerT::Mode == (int)Samplers::Translated) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return translation[texpal[texelX * texHeight + texelY]]; - } - else if (FilterModeT::Mode == (int)FilterModes::Nearest) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - texel = texPixels[texelX * texHeight + texelY]; - } - else - { - u -= oneU >> 1; - v -= oneV >> 1; - - unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth; - unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth; - unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight; - unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight; - unsigned int x0 = frac_x0 >> FRACBITS; - unsigned int x1 = frac_x1 >> FRACBITS; - unsigned int y0 = frac_y0 >> FRACBITS; - unsigned int y1 = frac_y1 >> FRACBITS; - - unsigned int p00 = texPixels[x0 * texHeight + y0]; - unsigned int p01 = texPixels[x0 * texHeight + y1]; - unsigned int p10 = texPixels[x1 * texHeight + y0]; - unsigned int p11 = texPixels[x1 * texHeight + y1]; - - unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15; - unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int a = 16 - inv_a; - unsigned int b = 16 - inv_b; - - unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; - unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; - - texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; - } - - if (SamplerT::Mode == (int)Samplers::Skycap) - { - int start_fade = 2; // How fast it should fade out - - int alpha_top = clamp(v >> (16 - start_fade), 0, 256); - int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); - int a = MIN(alpha_top, alpha_bottom); - int inv_a = 256 - a; - - uint32_t r = RPART(texel); - uint32_t g = GPART(texel); - uint32_t b = BPART(texel); - uint32_t fg_a = APART(texel); - uint32_t bg_red = RPART(color); - uint32_t bg_green = GPART(color); - uint32_t bg_blue = BPART(color); - r = (r * a + bg_red * inv_a + 127) >> 8; - g = (g * a + bg_green * inv_a + 127) >> 8; - b = (b * a + bg_blue * inv_a + 127) >> 8; - return MAKEARGB(fg_a, r, g, b); - } - else - { - return texel; - } - } - - FORCEINLINE static unsigned int VECTORCALL SampleShade(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight) - { - using namespace TriScreenDrawerModes; - - if (SamplerT::Mode == (int)Samplers::Shaded) - { - const uint8_t *texpal = (const uint8_t *)texPixels; - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Stencil) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]); - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else - { - return 0; - } - } - - template - FORCEINLINE static __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light) - { - using namespace TriScreenDrawerModes; - - if (ShadeModeT::Mode == (int)ShadeMode::Simple) - { - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); - } - else - { - int blue0 = BPART(ifgcolor0); - int green0 = GPART(ifgcolor0); - int red0 = RPART(ifgcolor0); - int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; - - int blue1 = BPART(ifgcolor1); - int green1 = GPART(ifgcolor1); - int red1 = RPART(ifgcolor1); - int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; - - __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); - - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, mlight); - fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); - } - return fgcolor; - } - - FORCEINLINE static __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) - { - using namespace TriScreenDrawerModes; - - if (BlendT::Mode == (int)BlendModes::Opaque) - { - __m128i outcolor = fgcolor; - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::Masked) - { - __m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128()); - mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128()); - __m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor)); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) - { - __m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7))); - __m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8)); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::Shaded) - { - ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; - ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; - __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); - __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); - - fgcolor = _mm_mullo_epi16(fgcolor, alpha); - bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); - __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddClampShaded) - { - ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; - ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; - __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); - - fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8); - __m128i outcolor = _mm_add_epi16(fgcolor, bgcolor); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - else - { - uint32_t alpha0 = APART(ifgcolor0); - uint32_t alpha1 = APART(ifgcolor1); - alpha0 += alpha0 >> 7; // 255->256 - alpha1 += alpha1 >> 7; // 255->256 - uint32_t inv_alpha0 = 256 - alpha0; - uint32_t inv_alpha1 = 256 - alpha1; - - uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; - uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; - uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; - uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; - - __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); - __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); - - fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); - bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); - - __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); - __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); - __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); - - __m128i out_lo, out_hi; - if (BlendT::Mode == (int)BlendModes::AddClamp) - { - out_lo = _mm_add_epi32(fg_lo, bg_lo); - out_hi = _mm_add_epi32(fg_hi, bg_hi); - } - else if (BlendT::Mode == (int)BlendModes::SubClamp) - { - out_lo = _mm_sub_epi32(fg_lo, bg_lo); - out_hi = _mm_sub_epi32(fg_hi, bg_hi); - } - else if (BlendT::Mode == (int)BlendModes::RevSubClamp) - { - out_lo = _mm_sub_epi32(bg_lo, fg_lo); - out_hi = _mm_sub_epi32(bg_hi, fg_hi); - } - - out_lo = _mm_srai_epi32(out_lo, 8); - out_hi = _mm_srai_epi32(out_hi, 8); - __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); - outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); - outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); - return outcolor; - } - } - static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); @@ -695,3 +692,168 @@ private: return top / bottom; } }; + +template +class RectScreenDrawer32 +{ +public: + static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread) + { + using namespace TriScreenDrawerModes; + + if (args->SimpleShade()) + { + Loop(destOrg, destWidth, destHeight, destPitch, args, thread); + } + else + { + Loop(destOrg, destWidth, destHeight, destPitch, args, thread); + } + } + +private: + template + FORCEINLINE static void VECTORCALL Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread) + { + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + uint32_t srcalpha = args->SrcAlpha(); + uint32_t destalpha = args->DestAlpha(); + + // Setup step variables + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + // Sampling stuff + uint32_t color = args->Color(); + const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation(); + const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels(); + uint32_t texWidth = args->TextureWidth(); + uint32_t texHeight = args->TextureHeight(); + uint32_t oneU, oneV; + if (SamplerT::Mode != (int)Samplers::Fill) + { + oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1; + oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1; + } + else + { + oneU = 0; + oneV = 0; + } + + // Shade constants + __m128i inv_desaturate, shade_fade, shade_light; + int desaturate; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + inv_desaturate = _mm_setr_epi16(256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate()); + shade_fade = _mm_set_epi16(args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue(), args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue()); + shade_light = _mm_set_epi16(args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue(), args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue()); + desaturate = args->ShadeDesaturate(); + } + else + { + inv_desaturate = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_light = _mm_setzero_si128(); + desaturate = 0; + } + + // Setup light + uint32_t lightpos = args->Light(); + lightpos += lightpos >> 7; // 255 -> 256 + __m128i mlight = _mm_set_epi16(256, lightpos, lightpos, lightpos, 256, lightpos, lightpos, lightpos); + __m128i shade_fade_lit; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + __m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight); + shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light); + } + else + { + shade_fade_lit = _mm_setzero_si128(); + } + + int count = x1 - x0; + int sseCount = count / 2; + + uint32_t posV = startV; + for (int y = y0; y < y1; y++, posV += stepV) + { + int coreBlock = y / 8; + if (coreBlock % thread->num_cores != thread->core) + continue; + + uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0; + + uint32_t posU = startU; + for (int i = 0; i < sseCount; i++) + { + // Load bgcolor + __m128i bgcolor; + if (BlendT::Mode != (int)BlendModes::Opaque) + bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)dest), _mm_setzero_si128()); + else + bgcolor = _mm_setzero_si128(); + + // Sample fgcolor + unsigned int ifgcolor[2], ifgshade[2]; + ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); + posU += stepU; + + ifgcolor[1] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[1] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); + posU += stepU; + + // Shade and blend + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); + __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + + // Store result + _mm_storel_epi64((__m128i*)dest, outcolor); + dest += 2; + } + + if (sseCount * 2 != count) + { + // Load bgcolor + __m128i bgcolor; + if (BlendT::Mode != (int)BlendModes::Opaque) + bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dest), _mm_setzero_si128()); + else + bgcolor = _mm_setzero_si128(); + + // Sample fgcolor + unsigned int ifgcolor[2], ifgshade[2]; + ifgcolor[0] = Sample32(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade32(posU, posV, texPixels, texWidth, texHeight); + ifgcolor[1] = 0; + ifgshade[1] = 0; + posU += stepU; + + // Shade and blend + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + fgcolor = Shade32(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); + __m128i outcolor = Blend32(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); + + // Store result + *dest = _mm_cvtsi128_si32(outcolor); + } + } + } +}; diff --git a/src/polyrenderer/drawers/poly_drawer8.h b/src/polyrenderer/drawers/poly_drawer8.h index 2f5669901e..c6f98bd096 100644 --- a/src/polyrenderer/drawers/poly_drawer8.h +++ b/src/polyrenderer/drawers/poly_drawer8.h @@ -24,6 +24,187 @@ #include "screen_triangle.h" +namespace TriScreenDrawerModes +{ + template + FORCEINLINE unsigned int Sample8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation) + { + uint8_t texel; + if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill) + { + return color; + } + else if (SamplerT::Mode == (int)Samplers::Translated) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + return translation[texPixels[texelX * texHeight + texelY]]; + } + else + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + texel = texPixels[texelX * texHeight + texelY]; + } + + if (SamplerT::Mode == (int)Samplers::Skycap) + { + int start_fade = 2; // How fast it should fade out + + int alpha_top = clamp(v >> (16 - start_fade), 0, 256); + int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); + int a = MIN(alpha_top, alpha_bottom); + int inv_a = 256 - a; + + if (a == 256) + return texel; + + uint32_t capcolor = GPalette.BaseColors[color].d; + uint32_t texelrgb = GPalette.BaseColors[texel].d; + uint32_t r = RPART(texelrgb); + uint32_t g = GPART(texelrgb); + uint32_t b = BPART(texelrgb); + uint32_t capcolor_red = RPART(capcolor); + uint32_t capcolor_green = GPART(capcolor); + uint32_t capcolor_blue = BPART(capcolor); + r = (r * a + capcolor_red * inv_a + 127) >> 8; + g = (g * a + capcolor_green * inv_a + 127) >> 8; + b = (b * a + capcolor_blue * inv_a + 127) >> 8; + return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; + } + else + { + return texel; + } + } + + template + FORCEINLINE unsigned int SampleShade8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight) + { + if (SamplerT::Mode == (int)Samplers::Shaded) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY]; + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else if (SamplerT::Mode == (int)Samplers::Stencil) + { + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0; + } + else + { + return 0; + } + } + + template + FORCEINLINE uint8_t ShadeAndBlend8(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha) + { + lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00; + uint8_t shadedfg = colormaps[lightshade + fgcolor]; + + if (BlendT::Mode == (int)BlendModes::Opaque) + { + return shadedfg; + } + else if (BlendT::Mode == (int)BlendModes::Masked) + { + return (fgcolor != 0) ? shadedfg : bgcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) + { + int32_t fg_r = GPalette.BaseColors[shadedfg].r; + int32_t fg_g = GPalette.BaseColors[shadedfg].g; + int32_t fg_b = GPalette.BaseColors[shadedfg].b; + int32_t bg_r = GPalette.BaseColors[bgcolor].r; + int32_t bg_g = GPalette.BaseColors[bgcolor].g; + int32_t bg_b = GPalette.BaseColors[bgcolor].b; + int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7)); + int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7)); + int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7)); + fg_r = MIN(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255); + fg_g = MIN(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255); + fg_b = MIN(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255); + + shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; + return (fgcolor != 0) ? shadedfg : bgcolor; + } + else if (BlendT::Mode == (int)BlendModes::Shaded) + { + fgshade = (fgshade * srcalpha + 128) >> 8; + uint32_t alpha = fgshade; + uint32_t inv_alpha = 256 - fgshade; + int32_t fg_r = GPalette.BaseColors[shadedfg].r; + int32_t fg_g = GPalette.BaseColors[shadedfg].g; + int32_t fg_b = GPalette.BaseColors[shadedfg].b; + int32_t bg_r = GPalette.BaseColors[bgcolor].r; + int32_t bg_g = GPalette.BaseColors[bgcolor].g; + int32_t bg_b = GPalette.BaseColors[bgcolor].b; + + fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8; + fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8; + fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8; + + shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; + return (alpha != 0) ? shadedfg : bgcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddClampShaded) + { + fgshade = (fgshade * srcalpha + 128) >> 8; + uint32_t alpha = fgshade; + int32_t fg_r = GPalette.BaseColors[shadedfg].r; + int32_t fg_g = GPalette.BaseColors[shadedfg].g; + int32_t fg_b = GPalette.BaseColors[shadedfg].b; + int32_t bg_r = GPalette.BaseColors[bgcolor].r; + int32_t bg_g = GPalette.BaseColors[bgcolor].g; + int32_t bg_b = GPalette.BaseColors[bgcolor].b; + + fg_r = MIN(bg_r + ((fg_r * alpha + 127) >> 8), 255); + fg_g = MIN(bg_g + ((fg_g * alpha + 127) >> 8), 255); + fg_b = MIN(bg_b + ((fg_b * alpha + 127) >> 8), 255); + + shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; + + return (alpha != 0) ? shadedfg : bgcolor; + } + else + { + int32_t fg_r = GPalette.BaseColors[shadedfg].r; + int32_t fg_g = GPalette.BaseColors[shadedfg].g; + int32_t fg_b = GPalette.BaseColors[shadedfg].b; + int32_t bg_r = GPalette.BaseColors[bgcolor].r; + int32_t bg_g = GPalette.BaseColors[bgcolor].g; + int32_t bg_b = GPalette.BaseColors[bgcolor].b; + + if (BlendT::Mode == (int)BlendModes::AddClamp) + { + fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255); + fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255); + fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255); + } + else if (BlendT::Mode == (int)BlendModes::SubClamp) + { + fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0); + fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0); + fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0); + } + else if (BlendT::Mode == (int)BlendModes::RevSubClamp) + { + fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0); + fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0); + fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0); + } + + shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; + return (fgcolor != 0) ? shadedfg : bgcolor; + } + } +} + template class TriScreenDrawer8 { @@ -123,9 +304,9 @@ public: { int lightshade = lightpos >> 8; uint8_t bgcolor = dest[x * 8 + ix]; - uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation); - uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight); - dest[x * 8 + ix] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); + uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); + uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight); + dest[x * 8 + ix] = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); posU += stepU; posV += stepV; lightpos += lightstep; @@ -185,9 +366,9 @@ public: { int lightshade = lightpos >> 8; uint8_t bgcolor = dest[x]; - uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation); - uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight); - dest[x] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); + uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); + uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight); + dest[x] = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); } posU += stepU; @@ -236,9 +417,9 @@ public: { int lightshade = lightpos >> 8; uint8_t bgcolor = dest[x]; - uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation); - uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight); - dest[x] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); + uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); + uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight); + dest[x] = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); } posU += stepU; @@ -258,187 +439,6 @@ public: } private: - FORCEINLINE static unsigned int Sample(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation) - { - using namespace TriScreenDrawerModes; - - uint8_t texel; - if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill) - { - return color; - } - else if (SamplerT::Mode == (int)Samplers::Translated) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return translation[texPixels[texelX * texHeight + texelY]]; - } - else - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - texel = texPixels[texelX * texHeight + texelY]; - } - - if (SamplerT::Mode == (int)Samplers::Skycap) - { - int start_fade = 2; // How fast it should fade out - - int alpha_top = clamp(v >> (16 - start_fade), 0, 256); - int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256); - int a = MIN(alpha_top, alpha_bottom); - int inv_a = 256 - a; - - if (a == 256) - return texel; - - uint32_t capcolor = GPalette.BaseColors[color].d; - uint32_t texelrgb = GPalette.BaseColors[texel].d; - uint32_t r = RPART(texelrgb); - uint32_t g = GPART(texelrgb); - uint32_t b = BPART(texelrgb); - uint32_t capcolor_red = RPART(capcolor); - uint32_t capcolor_green = GPART(capcolor); - uint32_t capcolor_blue = BPART(capcolor); - r = (r * a + capcolor_red * inv_a + 127) >> 8; - g = (g * a + capcolor_green * inv_a + 127) >> 8; - b = (b * a + capcolor_blue * inv_a + 127) >> 8; - return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)]; - } - else - { - return texel; - } - } - - FORCEINLINE static unsigned int SampleShade(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight) - { - using namespace TriScreenDrawerModes; - - if (SamplerT::Mode == (int)Samplers::Shaded) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY]; - sampleshadeout += sampleshadeout >> 7; // 255 -> 256 - return sampleshadeout; - } - else if (SamplerT::Mode == (int)Samplers::Stencil) - { - uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; - uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; - return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0; - } - else - { - return 0; - } - } - - FORCEINLINE static uint8_t ShadeAndBlend(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha) - { - using namespace TriScreenDrawerModes; - - lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00; - uint8_t shadedfg = colormaps[lightshade + fgcolor]; - - if (BlendT::Mode == (int)BlendModes::Opaque) - { - return shadedfg; - } - else if (BlendT::Mode == (int)BlendModes::Masked) - { - return (fgcolor != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor) - { - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7)); - int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7)); - int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7)); - fg_r = MIN(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255); - fg_g = MIN(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255); - fg_b = MIN(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255); - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (fgcolor != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::Shaded) - { - fgshade = (fgshade * srcalpha + 128) >> 8; - uint32_t alpha = fgshade; - uint32_t inv_alpha = 256 - fgshade; - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8; - fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8; - fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8; - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (alpha != 0) ? shadedfg : bgcolor; - } - else if (BlendT::Mode == (int)BlendModes::AddClampShaded) - { - fgshade = (fgshade * srcalpha + 128) >> 8; - uint32_t alpha = fgshade; - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - fg_r = MIN(bg_r + ((fg_r * alpha + 127) >> 8), 255); - fg_g = MIN(bg_g + ((fg_g * alpha + 127) >> 8), 255); - fg_b = MIN(bg_b + ((fg_b * alpha + 127) >> 8), 255); - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - - return (alpha != 0) ? shadedfg : bgcolor; - } - else - { - int32_t fg_r = GPalette.BaseColors[shadedfg].r; - int32_t fg_g = GPalette.BaseColors[shadedfg].g; - int32_t fg_b = GPalette.BaseColors[shadedfg].b; - int32_t bg_r = GPalette.BaseColors[bgcolor].r; - int32_t bg_g = GPalette.BaseColors[bgcolor].g; - int32_t bg_b = GPalette.BaseColors[bgcolor].b; - - if (BlendT::Mode == (int)BlendModes::AddClamp) - { - fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255); - fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255); - fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255); - } - else if (BlendT::Mode == (int)BlendModes::SubClamp) - { - fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0); - fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0); - fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0); - } - else if (BlendT::Mode == (int)BlendModes::RevSubClamp) - { - fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0); - fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0); - fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0); - } - - shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)]; - return (fgcolor != 0) ? shadedfg : bgcolor; - } - } - static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) { float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); @@ -453,3 +453,68 @@ private: return top / bottom; } }; + +template +class RectScreenDrawer8 +{ +public: + static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread) + { + using namespace TriScreenDrawerModes; + + int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth); + int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth); + int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight); + int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight); + + if (x1 <= x0 || y1 <= y0) + return; + + auto colormaps = args->BaseColormap(); + uint32_t srcalpha = args->SrcAlpha(); + uint32_t destalpha = args->DestAlpha(); + + // Setup step variables + float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0()); + float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0()); + uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000); + uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000); + uint32_t stepU = (int32_t)(fstepU * 0x1000000); + uint32_t stepV = (int32_t)(fstepV * 0x1000000); + + // Sampling stuff + uint32_t color = args->Color(); + const uint8_t * RESTRICT translation = args->Translation(); + const uint8_t * RESTRICT texPixels = args->TexturePixels(); + uint32_t texWidth = args->TextureWidth(); + uint32_t texHeight = args->TextureHeight(); + + // Setup light + uint32_t lightshade = args->Light(); + lightshade += lightshade >> 7; // 255 -> 256 + + int count = x1 - x0; + + uint32_t posV = startV; + for (int y = y0; y < y1; y++, posV += stepV) + { + int coreBlock = y / 8; + if (coreBlock % thread->num_cores != thread->core) + continue; + + uint8_t *dest = ((uint8_t*)destOrg) + y * destPitch + x0; + + uint32_t posU = startU; + for (int i = 0; i < count; i++) + { + uint8_t bgcolor = *dest; + uint8_t fgcolor = Sample8(posU, posV, texPixels, texWidth, texHeight, color, translation); + uint32_t fgshade = SampleShade8(posU, posV, texPixels, texWidth, texHeight); + *dest = ShadeAndBlend8(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); + + posU += stepU; + dest++; + } + } + } +}; diff --git a/src/polyrenderer/drawers/poly_triangle.cpp b/src/polyrenderer/drawers/poly_triangle.cpp index 9df271b245..4bf57e12e9 100644 --- a/src/polyrenderer/drawers/poly_triangle.cpp +++ b/src/polyrenderer/drawers/poly_triangle.cpp @@ -449,3 +449,23 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) PolyTriangleDrawer::draw_arrays(args, &thread_data); } + +///////////////////////////////////////////////////////////////////////////// + +void DrawRectCommand::Execute(DrawerThread *thread) +{ + WorkerThreadData thread_data; + thread_data.core = thread->core; + thread_data.num_cores = thread->num_cores; + + auto renderTarget = PolyRenderer::Instance()->RenderTarget; + const void *destOrg = renderTarget->GetBuffer(); + int destWidth = renderTarget->GetWidth(); + int destHeight = renderTarget->GetHeight(); + int destPitch = renderTarget->GetPitch(); + int blendmode = (int)args.BlendMode(); + if (renderTarget->IsBgra()) + ScreenTriangle::RectDrawers32[blendmode](destOrg, destWidth, destHeight, destPitch, &args, &thread_data); + else + ScreenTriangle::RectDrawers8[blendmode](destOrg, destWidth, destHeight, destPitch, &args, &thread_data); +} diff --git a/src/polyrenderer/drawers/poly_triangle.h b/src/polyrenderer/drawers/poly_triangle.h index d3019fc36a..7cd6dbe761 100644 --- a/src/polyrenderer/drawers/poly_triangle.h +++ b/src/polyrenderer/drawers/poly_triangle.h @@ -71,3 +71,15 @@ public: private: PolyDrawArgs args; }; + +class DrawRectCommand : public DrawerCommand +{ +public: + DrawRectCommand(const RectDrawArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "DrawRect"; } + +private: + RectDrawArgs args; +}; diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index d1fe49513f..d61bd79a22 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -922,7 +922,7 @@ void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThrea } } -std::vector ScreenTriangle::TriDrawers8 = +void(*ScreenTriangle::TriDrawers8[])(const TriDrawTriangleArgs *, WorkerThreadData *) = { &TriScreenDrawer8::Execute, // TextureOpaque &TriScreenDrawer8::Execute, // TextureMasked @@ -950,11 +950,14 @@ std::vector ScreenTria #ifdef NO_SSE -std::vector ScreenTriangle::TriDrawers32; +void(*ScreenTriangle::TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *) = +{ + nullptr +}; #else -std::vector ScreenTriangle::TriDrawers32 = +void(*ScreenTriangle::TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *) = { &TriScreenDrawer32::Execute, // TextureOpaque &TriScreenDrawer32::Execute, // TextureMasked @@ -981,3 +984,66 @@ std::vector ScreenTria }; #endif + +void(*ScreenTriangle::RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) = +{ + &RectScreenDrawer8::Execute, // TextureOpaque + &RectScreenDrawer8::Execute, // TextureMasked + &RectScreenDrawer8::Execute, // TextureAdd + &RectScreenDrawer8::Execute, // TextureSub + &RectScreenDrawer8::Execute, // TextureRevSub + &RectScreenDrawer8::Execute, // TextureAddSrcColor + &RectScreenDrawer8::Execute, // TranslatedOpaque + &RectScreenDrawer8::Execute, // TranslatedMasked + &RectScreenDrawer8::Execute, // TranslatedAdd + &RectScreenDrawer8::Execute, // TranslatedSub + &RectScreenDrawer8::Execute, // TranslatedRevSub + &RectScreenDrawer8::Execute, // TranslatedAddSrcColor + &RectScreenDrawer8::Execute, // Shaded + &RectScreenDrawer8::Execute, // AddShaded + &RectScreenDrawer8::Execute, // Stencil + &RectScreenDrawer8::Execute, // AddStencil + &RectScreenDrawer8::Execute, // FillOpaque + &RectScreenDrawer8::Execute, // FillAdd + &RectScreenDrawer8::Execute, // FillSub + &RectScreenDrawer8::Execute, // FillRevSub + &RectScreenDrawer8::Execute, // FillAddSrcColor + &RectScreenDrawer8::Execute // Skycap +}; + +#ifdef NO_SSE + +void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) = +{ + nullptr +}; + +#else + +void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) = +{ + &RectScreenDrawer32::Execute, // TextureOpaque + &RectScreenDrawer32::Execute, // TextureMasked + &RectScreenDrawer32::Execute, // TextureAdd + &RectScreenDrawer32::Execute, // TextureSub + &RectScreenDrawer32::Execute, // TextureRevSub + &RectScreenDrawer32::Execute, // TextureAddSrcColor + &RectScreenDrawer32::Execute, // TranslatedOpaque + &RectScreenDrawer32::Execute, // TranslatedMasked + &RectScreenDrawer32::Execute, // TranslatedAdd + &RectScreenDrawer32::Execute, // TranslatedSub + &RectScreenDrawer32::Execute, // TranslatedRevSub + &RectScreenDrawer32::Execute, // TranslatedAddSrcColor + &RectScreenDrawer32::Execute, // Shaded + &RectScreenDrawer32::Execute, // AddShaded + &RectScreenDrawer32::Execute, // Stencil + &RectScreenDrawer32::Execute, // AddStencil + &RectScreenDrawer32::Execute, // FillOpaque + &RectScreenDrawer32::Execute, // FillAdd + &RectScreenDrawer32::Execute, // FillSub + &RectScreenDrawer32::Execute, // FillRevSub + &RectScreenDrawer32::Execute, // FillAddSrcColor + &RectScreenDrawer32::Execute // Skycap +}; + +#endif diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index 090a398ba0..ab13649904 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -85,6 +85,8 @@ struct TriDrawTriangleArgs const PolyDrawArgs *uniforms; }; +class RectDrawArgs; + enum class TriBlendMode { TextureOpaque, @@ -119,8 +121,10 @@ public: static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); - static std::vector TriDrawers8; - static std::vector TriDrawers32; + static void(*TriDrawers8[])(const TriDrawTriangleArgs *, WorkerThreadData *); + static void(*TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *); + static void(*RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *); + static void(*RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *); }; struct ScreenTriangleStepVariables diff --git a/src/polyrenderer/scene/poly_playersprite.cpp b/src/polyrenderer/scene/poly_playersprite.cpp index 9920d2dc94..360af43bc2 100644 --- a/src/polyrenderer/scene/poly_playersprite.cpp +++ b/src/polyrenderer/scene/poly_playersprite.cpp @@ -576,52 +576,28 @@ fixed_t RenderPolyPlayerSprites::LightLevelToShade(int lightlevel, bool foggy) void PolyNoAccelPlayerSprite::Render() { -#if 0 if (xscale == 0 || fabs(yscale) < (1.0f / 32000.0f)) { // scaled to 0; can't see return; } - SpriteDrawerArgs drawerargs; - drawerargs.SetLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); - - FDynamicColormap *basecolormap = static_cast(Light.BaseColormap); - - bool visible = drawerargs.SetStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS); - if (!visible) - return; - - double spryscale = yscale; - bool sprflipvert = false; - fixed_t iscale = FLOAT2FIXED(1 / yscale); + RectDrawArgs args; + args.SetStyle(RenderStyle, Alpha, FillColor, Translation, pic, false); + args.SetLight(Light.BaseColormap, 255 - (Light.ColormapNum << 3)); double centerY = viewheight / 2; - - double sprtopscreen; + double y1, y2; if (renderflags & RF_YFLIP) { - sprflipvert = true; - spryscale = -spryscale; - iscale = -iscale; - sprtopscreen = centerY + (texturemid - pic->GetHeight()) * spryscale; + y1 = centerY + (texturemid - pic->GetHeight()) * (-yscale); + y2 = y1 + pic->GetHeight() * (-yscale); } else { - sprflipvert = false; - sprtopscreen = centerY - texturemid * spryscale; + y1 = centerY - texturemid * yscale; + y2 = y1 + pic->GetHeight() * yscale; } - - // clip to screen bounds - short *mfloorclip = screenheightarray; - short *mceilingclip = zeroarray; - - fixed_t frac = startfrac; - for (int x = x1; x < x2; x++) - { - drawerargs.DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false); - frac += xiscale; - } -#endif + args.Draw(x1, x2, y1, y2, 0.0f, 1.0f, 0.0f, 1.0f); } /////////////////////////////////////////////////////////////////////////////