diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 8d7a807f1..01e517ebd 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -39,7 +39,7 @@ #include "gl/data/gl_matrix.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" -#include "r_draw_wall32.h" +#include "r_draw_wall32_sse2.h" #include "r_draw_sprite32.h" #include "r_draw_span32_sse2.h" #include "r_draw_sky32.h" diff --git a/src/swrenderer/drawers/r_draw_span32_sse2.h b/src/swrenderer/drawers/r_draw_span32_sse2.h index 35fda5f92..34daaeb97 100644 --- a/src/swrenderer/drawers/r_draw_span32_sse2.h +++ b/src/swrenderer/drawers/r_draw_span32_sse2.h @@ -215,7 +215,6 @@ namespace swrenderer bgcolor = _mm_setzero_si128(); } - // Sample unsigned int ifgcolor[2]; ifgcolor[0] = Sample(texdata.xbits, texdata.ybits, texdata.xstep, texdata.ystep, texdata.xfrac, texdata.yfrac, texdata.yshift, texdata.xshift, texdata.xmask, texdata.source); texdata.xfrac += texdata.xstep; diff --git a/src/swrenderer/drawers/r_draw_wall32_sse2.h b/src/swrenderer/drawers/r_draw_wall32_sse2.h new file mode 100644 index 000000000..03be70e75 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_wall32_sse2.h @@ -0,0 +1,406 @@ +/* +** Drawer commands for walls +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_walldrawer.h" + +namespace swrenderer +{ + namespace DrawWall32TModes + { + enum class WallBlendModes { Opaque, Masked, AddClamp, SubClamp, RevSubClamp }; + struct OpaqueWall { static const int Mode = (int)WallBlendModes::Opaque; }; + struct MaskedWall { static const int Mode = (int)WallBlendModes::Masked; }; + struct AddClampWall { static const int Mode = (int)WallBlendModes::AddClamp; }; + struct SubClampWall { static const int Mode = (int)WallBlendModes::SubClamp; }; + struct RevSubClampWall { static const int Mode = (int)WallBlendModes::RevSubClamp; }; + + enum class FilterModes { Nearest, Linear }; + struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; + struct LinearFilter { static const int Mode = (int)FilterModes::Linear; }; + + enum class ShadeMode { Simple, Advanced }; + struct SimpleShade { static const int Mode = (int)ShadeMode::Simple; }; + struct AdvancedShade { static const int Mode = (int)ShadeMode::Advanced; }; + } + + template + class DrawWall32T : public DrawerCommand + { + protected: + WallDrawerArgs args; + + public: + DrawWall32T(const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + using namespace DrawWall32TModes; + + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + Loop(thread, shade_constants); + else + Loop(thread, shade_constants); + } + else + { + if (is_nearest_filter) + Loop(thread, shade_constants); + else + Loop(thread, shade_constants); + } + } + + template + void Loop(DrawerThread *thread, ShadeConstants shade_constants) + { + using namespace DrawWall32TModes; + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + int light = 256 - (args.Light() >> (FRACBITS - 8)); + __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); + __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); + + __m128i inv_desaturate, shade_fade, shade_light; + int desaturate; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); + shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); + desaturate = shade_constants.desaturate; + } + else + { + inv_desaturate = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_fade = _mm_setzero_si128(); + shade_light = _mm_setzero_si128(); + desaturate = 0; + } + + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float vpz = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float stepvpz = args.dc_viewpos_step.Z * thread->num_cores; + __m128 viewpos_z = _mm_setr_ps(vpz, vpz + stepvpz, 0.0f, 0.0f); + __m128 step_viewpos_z = _mm_set1_ps(stepvpz * 2.0f); + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + if (FilterModeT::Mode == (int)FilterModes::Linear) + { + frac -= one / 2; + } + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + int ssecount = count / 2; + for (int index = 0; index < ssecount; index++) + { + int offset = index * pitch * 2; + uint32_t desttmp[2]; + desttmp[0] = dest[offset]; + desttmp[1] = dest[offset + pitch]; + + __m128i bgcolor; + if (BlendT::Mode != (int)WallBlendModes::Opaque) + { + bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)desttmp), _mm_setzero_si128()); + } + else + { + bgcolor = _mm_setzero_si128(); + } + + unsigned int ifgcolor[2]; + ifgcolor[0] = Sample(frac, source, source2, textureheight, one, texturefracx); + frac += fracstep; + + ifgcolor[1] = Sample(frac, source, source2, textureheight, one, texturefracx); + frac += fracstep; + + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade, shade_light, lights, num_lights, viewpos_z); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], srcalpha, destalpha); + + _mm_storel_epi64((__m128i*)desttmp, outcolor); + dest[offset] = desttmp[0]; + dest[offset + pitch] = desttmp[1]; + viewpos_z = _mm_add_ps(viewpos_z, step_viewpos_z); + } + + if (ssecount * 2 != count) + { + int index = ssecount * 2; + int offset = index * pitch; + + __m128i bgcolor; + if (BlendT::Mode != (int)WallBlendModes::Opaque) + { + bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(dest[offset]), _mm_setzero_si128()); + } + else + { + bgcolor = _mm_setzero_si128(); + } + + unsigned int ifgcolor[2]; + ifgcolor[0] = Sample(frac, source, source2, textureheight, one, texturefracx); + ifgcolor[1] = 0; + __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); + + fgcolor = Shade(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade, shade_light, lights, num_lights, viewpos_z); + __m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], srcalpha, destalpha); + + dest[offset] = _mm_cvtsi128_si32(outcolor); + } + } + + template + unsigned int Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx) + { + using namespace DrawWall32TModes; + + if (FilterModeT::Mode == (int)FilterModes::Nearest) + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index]; + } + else + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + + return (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue; + } + } + + template + __m128i Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light, const DrawerLight *lights, int num_lights, __m128 viewpos_z) + { + using namespace DrawWall32TModes; + + __m128i material = fgcolor; + if (ShadeModeT::Mode == (int)ShadeMode::Simple) + { + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + } + else + { + int blue0 = BPART(ifgcolor0); + int green0 = GPART(ifgcolor0); + int red0 = RPART(ifgcolor0); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + + int blue1 = BPART(ifgcolor1); + int green1 = GPART(ifgcolor1); + int red1 = RPART(ifgcolor1); + int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate; + + __m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0); + + fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); + fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + } + + return AddLights(material, fgcolor, lights, num_lights, viewpos_z); + } + + __m128i AddLights(__m128i material, __m128i fgcolor, const DrawerLight *lights, int num_lights, __m128 viewpos_z) + { + using namespace DrawWall32TModes; + + __m128i lit = _mm_setzero_si128(); + + for (int i = 0; i != num_lights; i++) + { + __m128 light_x = _mm_set1_ps(lights[i].x); + __m128 light_y = _mm_set1_ps(lights[i].y); + __m128 light_z = _mm_set1_ps(lights[i].z); + __m128 light_radius = _mm_set1_ps(lights[i].radius); + __m128 m256 = _mm_set1_ps(256.0f); + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + __m128 Lxy2 = light_x; // L.x*L.x + L.y*L.y + __m128 Lz = _mm_sub_ps(light_z, viewpos_z); + __m128 dist2 = _mm_add_ps(Lxy2, _mm_mul_ps(Lz, Lz)); + __m128 rcp_dist = _mm_rsqrt_ps(dist2); + __m128 dist = _mm_mul_ps(dist2, rcp_dist); + __m128 distance_attenuation = _mm_sub_ps(m256, _mm_min_ps(_mm_mul_ps(dist, light_radius), m256)); + + // The simple light type + __m128 simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + __m128 point_attenuation = _mm_mul_ps(_mm_mul_ps(light_y, rcp_dist), distance_attenuation); + + __m128 is_attenuated = _mm_cmpeq_ps(light_y, _mm_setzero_ps()); + __m128i attenuation = _mm_cvtps_epi32(_mm_or_ps(_mm_and_ps(is_attenuated, simple_attenuation), _mm_andnot_ps(is_attenuated, point_attenuation))); + attenuation = _mm_packs_epi32(_mm_shuffle_epi32(attenuation, _MM_SHUFFLE(0, 0, 0, 0)), _mm_shuffle_epi32(attenuation, _MM_SHUFFLE(1, 1, 1, 1))); + + __m128i light_color = _mm_cvtsi32_si128(lights[i].color); + light_color = _mm_unpacklo_epi8(light_color, _mm_setzero_si128()); + light_color = _mm_shuffle_epi32(light_color, _MM_SHUFFLE(1, 0, 1, 0)); + + lit = _mm_add_epi16(lit, _mm_srli_epi16(_mm_mullo_epi16(light_color, attenuation), 8)); + } + + fgcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(material, lit), 8)); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(255)); + return fgcolor; + } + + __m128i Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, uint32_t srcalpha, uint32_t destalpha) + { + using namespace DrawWall32TModes; + + if (BlendT::Mode == (int)WallBlendModes::Opaque) + { + __m128i outcolor = fgcolor; + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + return outcolor; + } + else if (BlendT::Mode == (int)WallBlendModes::Masked) + { + __m128i alpha = _mm_shufflelo_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); + alpha = _mm_shufflehi_epi16(fgcolor, _MM_SHUFFLE(3, 3, 3, 3)); + alpha = _mm_add_epi16(alpha, _mm_srli_epi16(alpha, 7)); // 255 -> 256 + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + fgcolor = _mm_mullo_epi16(fgcolor, alpha); + bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha); + __m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + else + { + uint32_t alpha0 = APART(ifgcolor0); + uint32_t alpha1 = APART(ifgcolor1); + alpha0 += alpha0 >> 7; // 255->256 + alpha1 += alpha1 >> 7; // 255->256 + uint32_t inv_alpha0 = 256 - alpha0; + uint32_t inv_alpha1 = 256 - alpha1; + + uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8; + uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8; + uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8; + uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8; + + __m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0); + __m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0); + + fgcolor = _mm_mullo_epi16(fgcolor, fgalpha); + bgcolor = _mm_mullo_epi16(bgcolor, bgalpha); + + __m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128()); + __m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128()); + __m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128()); + + __m128i out_lo, out_hi; + if (BlendT::Mode == (int)WallBlendModes::AddClamp) + { + out_lo = _mm_add_epi32(fg_lo, bg_lo); + out_hi = _mm_add_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)WallBlendModes::SubClamp) + { + out_lo = _mm_sub_epi32(fg_lo, bg_lo); + out_hi = _mm_sub_epi32(fg_hi, bg_hi); + } + else if (BlendT::Mode == (int)WallBlendModes::RevSubClamp) + { + out_lo = _mm_sub_epi32(bg_lo, fg_lo); + out_hi = _mm_sub_epi32(bg_hi, fg_hi); + } + + out_lo = _mm_srai_epi32(out_lo, 8); + out_hi = _mm_srai_epi32(out_hi, 8); + __m128i outcolor = _mm_packs_epi32(out_lo, out_hi); + outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128()); + outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000)); + return outcolor; + } + } + + FString DebugInfo() override { return "DrawWall32T"; } + }; + + typedef DrawWall32T DrawWall32Command; + typedef DrawWall32T DrawWallMasked32Command; + typedef DrawWall32T DrawWallAddClamp32Command; + typedef DrawWall32T DrawWallSubClamp32Command; + typedef DrawWall32T DrawWallRevSubClamp32Command; +}