From b2e61412da962302ea766dbbac6ba5ddf70d40c8 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Wed, 22 Mar 2017 09:11:21 +0100 Subject: [PATCH 1/2] - add missing sprite blends to the soft poly drawer --- src/polyrenderer/drawers/poly_drawer32_sse2.h | 64 ++++++++++++++++--- src/polyrenderer/drawers/screen_triangle.cpp | 48 +++++++------- src/polyrenderer/drawers/screen_triangle.h | 4 +- 3 files changed, 83 insertions(+), 33 deletions(-) diff --git a/src/polyrenderer/drawers/poly_drawer32_sse2.h b/src/polyrenderer/drawers/poly_drawer32_sse2.h index b15b5e47b..c86d3bc93 100644 --- a/src/polyrenderer/drawers/poly_drawer32_sse2.h +++ b/src/polyrenderer/drawers/poly_drawer32_sse2.h @@ -82,7 +82,6 @@ public: auto flags = args->uniforms->flags; bool is_fixed_light = (flags & TriUniforms::fixed_light) == TriUniforms::fixed_light; uint32_t lightmask = is_fixed_light ? 0 : 0xffffffff; - auto colormaps = args->colormaps; uint32_t srcalpha = args->uniforms->srcalpha; uint32_t destalpha = args->uniforms->destalpha; @@ -202,12 +201,14 @@ public: bgcolor = _mm_setzero_si128(); // Sample fgcolor - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; ifgcolor[0] = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight); for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; ifgcolor[1] = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[1] = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight); for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -232,7 +233,7 @@ public: // Shade and blend __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); - __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], srcalpha, destalpha); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); // Store result _mm_storel_epi64((__m128i*)(dest + x * 8 + ix * 2), outcolor); @@ -303,12 +304,14 @@ public: bgcolor = _mm_setzero_si128(); // Sample fgcolor - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; ifgcolor[0] = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight); for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; ifgcolor[1] = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[1] = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight); for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -333,7 +336,7 @@ public: // Shade and blend __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); - __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], srcalpha, destalpha); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); // Store result _mm_storel_epi64((__m128i*)desttmp, outcolor); @@ -393,12 +396,14 @@ public: bgcolor = _mm_setzero_si128(); // Sample fgcolor - unsigned int ifgcolor[2]; + unsigned int ifgcolor[2], ifgshade[2]; ifgcolor[0] = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[0] = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight); for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; ifgcolor[1] = Sample(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight, oneU, oneV, color, translation); + ifgshade[1] = SampleShade(varyingPos[0], varyingPos[1], texPixels, texWidth, texHeight); for (int j = 0; j < TriVertex::NumVarying; j++) varyingPos[j] += varyingStep[j]; @@ -423,7 +428,7 @@ public: // Shade and blend __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); - __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], srcalpha, destalpha); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); // Store result _mm_storel_epi64((__m128i*)desttmp, outcolor); @@ -525,6 +530,25 @@ private: } } + FORCEINLINE static unsigned int VECTORCALL SampleShade(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight) + { + using namespace TriScreenDrawerModes; + + if (SamplerT::Mode == (int)Samplers::Shaded) + { + const uint8_t *texpal = (const uint8_t *)texPixels; + uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16; + uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16; + unsigned int sampleshadeout = texpal[texelX * texHeight + texelY]; + sampleshadeout += sampleshadeout >> 7; // 255 -> 256 + return sampleshadeout; + } + else + { + return 0; + } + } + template FORCEINLINE static __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light) { @@ -556,7 +580,7 @@ private: return fgcolor; } - FORCEINLINE static __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha) + FORCEINLINE static __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha) { using namespace TriScreenDrawerModes; @@ -582,6 +606,30 @@ private: outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); return outcolor; } + else if (BlendT::Mode == (int)BlendModes::Shaded) + { + ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8; + ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8; + __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else if (BlendT::Mode == (int)BlendModes::AddClampShaded) + { + __m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0); + + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8); + __m128i outcolor = _mm_add_epi16(fgcolor, bgcolor); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } else { uint32_t alpha0 = APART(ifgcolor0); diff --git a/src/polyrenderer/drawers/screen_triangle.cpp b/src/polyrenderer/drawers/screen_triangle.cpp index b6a737a01..ccb5ec13b 100644 --- a/src/polyrenderer/drawers/screen_triangle.cpp +++ b/src/polyrenderer/drawers/screen_triangle.cpp @@ -713,13 +713,13 @@ std::vector ScreenTria &TriScreenDrawer8::Execute, // "Add", "add", false &TriScreenDrawer8::Execute, // "Sub", "sub", false &TriScreenDrawer8::Execute, // "RevSub", "revsub", false - &TriScreenDrawer8::Execute, // "Stencil", "stencil", false - &TriScreenDrawer8::Execute, // "Shaded", "shaded", false + &TriScreenDrawer8::Execute, // "Stencil", "stencil", false + &TriScreenDrawer8::Execute, // "Shaded", "shaded", false &TriScreenDrawer8::Execute, // "TranslateCopy", "opaque", true - &TriScreenDrawer8::Execute, // "TranslateAlphaBlend", "masked", true - &TriScreenDrawer8::Execute, // "TranslateAdd", "add", true - &TriScreenDrawer8::Execute, // "TranslateSub", "sub", true - &TriScreenDrawer8::Execute, // "TranslateRevSub", "revsub", true + &TriScreenDrawer8::Execute, // "TranslateAlphaBlend", "masked", true + &TriScreenDrawer8::Execute, // "TranslateAdd", "add", true + &TriScreenDrawer8::Execute, // "TranslateSub", "sub", true + &TriScreenDrawer8::Execute, // "TranslateRevSub", "revsub", true &TriScreenDrawer8::Execute, // "AddSrcColorOneMinusSrcColor", "addsrccolor", false &TriScreenDrawer8::Execute // "Skycap", "skycap", false }; @@ -732,13 +732,13 @@ std::vector ScreenTria &TriScreenDrawer8::Execute, // "Add", "add", false &TriScreenDrawer8::Execute, // "Sub", "sub", false &TriScreenDrawer8::Execute, // "RevSub", "revsub", false - &TriScreenDrawer8::Execute, // "Stencil", "stencil", false - &TriScreenDrawer8::Execute, // "Shaded", "shaded", false + &TriScreenDrawer8::Execute, // "Stencil", "stencil", false + &TriScreenDrawer8::Execute, // "Shaded", "shaded", false &TriScreenDrawer8::Execute, // "TranslateCopy", "opaque", true - &TriScreenDrawer8::Execute, // "TranslateAlphaBlend", "masked", true - &TriScreenDrawer8::Execute, // "TranslateAdd", "add", true - &TriScreenDrawer8::Execute, // "TranslateSub", "sub", true - &TriScreenDrawer8::Execute, // "TranslateRevSub", "revsub", true + &TriScreenDrawer8::Execute, // "TranslateAlphaBlend", "masked", true + &TriScreenDrawer8::Execute, // "TranslateAdd", "add", true + &TriScreenDrawer8::Execute, // "TranslateSub", "sub", true + &TriScreenDrawer8::Execute, // "TranslateRevSub", "revsub", true &TriScreenDrawer8::Execute, // "AddSrcColorOneMinusSrcColor", "addsrccolor", false &TriScreenDrawer8::Execute // "Skycap", "skycap", false }; @@ -758,13 +758,13 @@ std::vector ScreenTria &TriScreenDrawer32::Execute, // "Add", "add", false &TriScreenDrawer32::Execute, // "Sub", "sub", false &TriScreenDrawer32::Execute, // "RevSub", "revsub", false - &TriScreenDrawer32::Execute, // "Stencil", "stencil", false - &TriScreenDrawer32::Execute, // "Shaded", "shaded", false + &TriScreenDrawer32::Execute, // "Stencil", "stencil", false + &TriScreenDrawer32::Execute, // "Shaded", "shaded", false &TriScreenDrawer32::Execute, // "TranslateCopy", "opaque", true - &TriScreenDrawer32::Execute, // "TranslateAlphaBlend", "masked", true - &TriScreenDrawer32::Execute, // "TranslateAdd", "add", true - &TriScreenDrawer32::Execute, // "TranslateSub", "sub", true - &TriScreenDrawer32::Execute, // "TranslateRevSub", "revsub", true + &TriScreenDrawer32::Execute, // "TranslateAlphaBlend", "masked", true + &TriScreenDrawer32::Execute, // "TranslateAdd", "add", true + &TriScreenDrawer32::Execute, // "TranslateSub", "sub", true + &TriScreenDrawer32::Execute, // "TranslateRevSub", "revsub", true &TriScreenDrawer32::Execute, // "AddSrcColorOneMinusSrcColor", "addsrccolor", false &TriScreenDrawer32::Execute // "Skycap", "skycap", false }; @@ -777,13 +777,13 @@ std::vector ScreenTria &TriScreenDrawer32::Execute, // "Add", "add", false &TriScreenDrawer32::Execute, // "Sub", "sub", false &TriScreenDrawer32::Execute, // "RevSub", "revsub", false - &TriScreenDrawer32::Execute, // "Stencil", "stencil", false - &TriScreenDrawer32::Execute, // "Shaded", "shaded", false + &TriScreenDrawer32::Execute, // "Stencil", "stencil", false + &TriScreenDrawer32::Execute, // "Shaded", "shaded", false &TriScreenDrawer32::Execute, // "TranslateCopy", "opaque", true - &TriScreenDrawer32::Execute, // "TranslateAlphaBlend", "masked", true - &TriScreenDrawer32::Execute, // "TranslateAdd", "add", true - &TriScreenDrawer32::Execute, // "TranslateSub", "sub", true - &TriScreenDrawer32::Execute, // "TranslateRevSub", "revsub", true + &TriScreenDrawer32::Execute, // "TranslateAlphaBlend", "masked", true + &TriScreenDrawer32::Execute, // "TranslateAdd", "add", true + &TriScreenDrawer32::Execute, // "TranslateSub", "sub", true + &TriScreenDrawer32::Execute, // "TranslateRevSub", "revsub", true &TriScreenDrawer32::Execute, // "AddSrcColorOneMinusSrcColor", "addsrccolor", false &TriScreenDrawer32::Execute // "Skycap", "skycap", false }; diff --git a/src/polyrenderer/drawers/screen_triangle.h b/src/polyrenderer/drawers/screen_triangle.h index 3e539f2c6..470029c84 100644 --- a/src/polyrenderer/drawers/screen_triangle.h +++ b/src/polyrenderer/drawers/screen_triangle.h @@ -163,13 +163,15 @@ struct ScreenTriangleStepVariables namespace TriScreenDrawerModes { - enum class BlendModes { Opaque, Masked, AddClamp, SubClamp, RevSubClamp, AddSrcColorOneMinusSrcColor }; + enum class BlendModes { Opaque, Masked, AddClamp, SubClamp, RevSubClamp, AddSrcColorOneMinusSrcColor, Shaded, AddClampShaded }; struct OpaqueBlend { static const int Mode = (int)BlendModes::Opaque; }; struct MaskedBlend { static const int Mode = (int)BlendModes::Masked; }; struct AddClampBlend { static const int Mode = (int)BlendModes::AddClamp; }; struct SubClampBlend { static const int Mode = (int)BlendModes::SubClamp; }; struct RevSubClampBlend { static const int Mode = (int)BlendModes::RevSubClamp; }; struct AddSrcColorBlend { static const int Mode = (int)BlendModes::AddSrcColorOneMinusSrcColor; }; + struct ShadedBlend { static const int Mode = (int)BlendModes::Shaded; }; + struct AddClampShadedBlend { static const int Mode = (int)BlendModes::AddClampShaded; }; enum class FilterModes { Nearest, Linear }; struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; From 09c3060728079c6ec12128f3f0821379888ebd23 Mon Sep 17 00:00:00 2001 From: Rachael Alexanderson Date: Wed, 22 Mar 2017 05:51:12 -0400 Subject: [PATCH 2/2] - fixed: skybug on ARM was caused by undefined conversion from float to unsigned int. adding intermediary signed int cast fixes this problem. --- src/swrenderer/plane/r_skyplane.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/swrenderer/plane/r_skyplane.cpp b/src/swrenderer/plane/r_skyplane.cpp index c75411e7f..e76016a3e 100644 --- a/src/swrenderer/plane/r_skyplane.cpp +++ b/src/swrenderer/plane/r_skyplane.cpp @@ -180,8 +180,8 @@ namespace swrenderer double v = (texturemid + uv_stepd * (y1 - viewport->CenterY + 0.5)) / height; double v_step = uv_stepd / height; - uint32_t uv_pos = (uint32_t)(v * 0x01000000); - uint32_t uv_step = (uint32_t)(v_step * 0x01000000); + uint32_t uv_pos = (uint32_t)(int32_t)(v * 0x01000000); + uint32_t uv_step = (uint32_t)(int32_t)(v_step * 0x01000000); int x = start_x; if (renderportal->MirrorFlags & RF_XFLIP)