From 5644b2c0fdbbc175faf26199b585aaaa6c9066e1 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 17 Mar 2017 06:58:36 +0100 Subject: [PATCH] Create non-sse version of the wall drawers --- src/swrenderer/drawers/r_draw_rgba.h | 69 +---- src/swrenderer/drawers/r_draw_sky32.h | 56 ++-- src/swrenderer/drawers/r_draw_wall32.h | 338 +++++++++++++++++++++++++ 3 files changed, 373 insertions(+), 90 deletions(-) create mode 100644 src/swrenderer/drawers/r_draw_wall32.h diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index b7bb610c4d..0ecf21235b 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -373,67 +373,12 @@ namespace swrenderer } }; - ///////////////////////////////////////////////////////////////////////////// - // Vector classes for non-SSE drawers that behave like their SSE counterparts - - namespace drawervectors + struct BgraColor { - struct vec4ui - { - vec4ui() {} - vec4ui(uint32_t v) : a(v), r(v), g(v), b(v) { } - vec4ui(uint32_t a, uint32_t r, uint32_t g, uint32_t b) : a(a), r(r), g(g), b(b) { } - uint32_t a, r, g, b; - }; - - struct vec8us - { - vec8us() {} - vec8us(uint16_t v) : a0(v), r0(v), g0(v), b0(v) { } - vec8us(uint16_t a0, uint16_t r0, uint16_t g0, uint16_t b0, uint16_t a1, uint16_t r1, uint16_t g1, uint16_t b1) : a0(a0), r0(r0), g0(g0), b0(b0), a1(a1), r1(r1), g1(g1), b1(b1) { } - uint16_t a0, r0, g0, b0, a1, r1, g1, b1; - }; - - inline vec8us unpack(uint32_t lo, uint32_t hi) { return vec8us(APART(lo), RPART(lo), GPART(lo), BPART(lo), APART(hi), RPART(hi), GPART(hi), BPART(hi)); } - inline vec4ui unpacklo(vec8us v) { return vec4ui(v.a0, v.r0, v.g0, v.b0); } - inline vec4ui unpackhi(vec8us v) { return vec4ui(v.a1, v.r1, v.g1, v.b1); } - - inline vec8us pack(vec4ui lo, vec4ui hi) - { - return vec8us(lo.a, lo.r, lo.g, lo.b, hi.a, hi.r, hi.g, hi.b); - } - inline uint32_t packlo(vec8us v) - { - return MAKEARGB((uint32_t)clamp(v.a0, 0, 255), (uint32_t)clamp(v.r0, 0, 255), (uint32_t)clamp(v.g0, 0, 255), (uint32_t)clamp(v.b0, 0, 255)); - } - inline uint32_t packhi(vec8us v) - { - return MAKEARGB((uint32_t)clamp(v.a1, 0, 255), (uint32_t)clamp(v.r1, 0, 255), (uint32_t)clamp(v.g1, 0, 255), (uint32_t)clamp(v.b1, 0, 255)); - } - - inline vec8us operator+(vec8us a, vec8us b) - { - return vec8us(a.a0 + b.a0, a.r0 + b.r0, a.g0 + b.g0, a.b0 + b.b0, a.a1 + b.a1, a.r1 + b.r1, a.g1 + b.g1, a.b1 + b.b1); - } - - inline vec8us operator-(vec8us a, vec8us b) - { - return vec8us(a.a0 - b.a0, a.r0 - b.r0, a.g0 - b.g0, a.b0 - b.b0, a.a1 - b.a1, a.r1 - b.r1, a.g1 - b.g1, a.b1 - b.b1); - } - - inline vec8us operator*(vec8us a, vec8us b) - { - return vec8us(a.a0 * b.a0, a.r0 * b.r0, a.g0 * b.g0, a.b0 * b.b0, a.a1 * b.a1, a.r1 * b.r1, a.g1 * b.g1, a.b1 * b.b1); - } - - inline vec8us operator<<(vec8us a, int bits) - { - return vec8us(a.a0 << bits, a.r0 << bits, a.g0 << bits, a.b0 << bits, a.a1 << bits, a.r1 << bits, a.g1 << bits, a.b1 << bits); - } - - inline vec8us operator>>(vec8us a, int bits) - { - return vec8us(a.a0 >> bits, a.r0 >> bits, a.g0 >> bits, a.b0 >> bits, a.a1 >> bits, a.r1 >> bits, a.g1 >> bits, a.b1 >> bits); - } - } + uint32_t b, g, r, a; + BgraColor() { } + BgraColor(uint32_t c) : b(BPART(c)), g(GPART(c)), r(RPART(c)), a(APART(c)) { } + BgraColor &operator=(uint32_t c) { b = BPART(c); g = GPART(c); r = RPART(c); a = APART(c); return *this; } + operator uint32_t() const { return MAKEARGB(a, r, g, b); } + }; } diff --git a/src/swrenderer/drawers/r_draw_sky32.h b/src/swrenderer/drawers/r_draw_sky32.h index ed34c340fa..97bddc15e2 100644 --- a/src/swrenderer/drawers/r_draw_sky32.h +++ b/src/swrenderer/drawers/r_draw_sky32.h @@ -38,11 +38,9 @@ namespace swrenderer void Execute(DrawerThread *thread) override { - using namespace drawervectors; - uint32_t *dest = (uint32_t *)args.Dest(); int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + int pitch = args.Viewport()->RenderTarget->GetPitch(); const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); int textureheight0 = args.FrontTextureHeight(); @@ -87,8 +85,8 @@ namespace swrenderer return; } - vec8us solid_top_fill = unpack(solid_top, 0); - vec8us solid_bottom_fill = unpack(solid_bottom, 0); + BgraColor solid_top_fill = solid_top; + BgraColor solid_bottom_fill = solid_bottom; int index = skipped; @@ -107,12 +105,14 @@ namespace swrenderer uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; uint32_t fg = source0[sample_index]; - vec8us alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0); - vec8us inv_alpha = vec8us(256) - alpha; + uint32_t alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0); + uint32_t inv_alpha = 256 - alpha; - vec8us c = unpack(fg, 0); - c = (c * alpha + solid_top_fill * inv_alpha) >> 8; - *dest = packlo(c); + BgraColor c = fg; + c.r = (c.r * alpha + solid_top_fill.r * inv_alpha) >> 8; + c.g = (c.g * alpha + solid_top_fill.g * inv_alpha) >> 8; + c.b = (c.b * alpha + solid_top_fill.b * inv_alpha) >> 8; + *dest = c; frac += fracstep; dest += pitch; @@ -136,12 +136,14 @@ namespace swrenderer uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; uint32_t fg = source0[sample_index]; - vec8us alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - vec8us inv_alpha = vec8us(256) - alpha; + uint32_t alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + uint32_t inv_alpha = 256 - alpha; - vec8us c = unpack(fg, 0); - c = (c * alpha + solid_top_fill * inv_alpha) >> 8; - *dest = packlo(c); + BgraColor c = fg; + c.r = (c.r * alpha + solid_top_fill.r * inv_alpha) >> 8; + c.g = (c.g * alpha + solid_top_fill.g * inv_alpha) >> 8; + c.b = (c.b * alpha + solid_top_fill.b * inv_alpha) >> 8; + *dest = c; frac += fracstep; dest += pitch; @@ -170,11 +172,9 @@ namespace swrenderer void Execute(DrawerThread *thread) override { - using namespace drawervectors; - uint32_t *dest = (uint32_t *)args.Dest(); int count = args.Count(); - int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + int pitch = args.Viewport()->RenderTarget->GetPitch(); const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); const uint32_t *source1 = (const uint32_t *)args.BackTexturePixels(); int textureheight0 = args.FrontTextureHeight(); @@ -228,8 +228,8 @@ namespace swrenderer return; } - vec8us solid_top_fill = unpack(solid_top, 0); - vec8us solid_bottom_fill = unpack(solid_bottom, 0); + BgraColor solid_top_fill = solid_top; + BgraColor solid_bottom_fill = solid_bottom; int index = skipped; @@ -253,12 +253,12 @@ namespace swrenderer fg = source1[sample_index2]; } - vec8us alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0); - vec8us inv_alpha = vec8us(256) - alpha; + uint32_t alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0); + uint32_t inv_alpha = 256 - alpha; - vec8us c = unpack(fg, 0); + BgraColor c = fg; c = (c * alpha + solid_top_fill * inv_alpha) >> 8; - *dest = packlo(c); + *dest = c; frac += fracstep; dest += pitch; @@ -293,12 +293,12 @@ namespace swrenderer fg = source1[sample_index2]; } - vec8us alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); - vec8us inv_alpha = vec8us(256) - alpha; + uint32_t alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + uint32_t inv_alpha = 256 - alpha; - vec8us c = unpack(fg, 0); + BgraColor c = fg; c = (c * alpha + solid_top_fill * inv_alpha) >> 8; - *dest = packlo(c); + *dest = c; frac += fracstep; dest += pitch; diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h new file mode 100644 index 0000000000..263ed2e3c7 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -0,0 +1,338 @@ +/* +** Drawer commands for walls +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_walldrawer.h" + +namespace swrenderer +{ + namespace DrawWall32TModes + { + enum class WallBlendModes { Opaque, Masked, AddClamp, SubClamp, RevSubClamp }; + struct OpaqueWall { static const int Mode = (int)WallBlendModes::Opaque; }; + struct MaskedWall { static const int Mode = (int)WallBlendModes::Masked; }; + struct AddClampWall { static const int Mode = (int)WallBlendModes::AddClamp; }; + struct SubClampWall { static const int Mode = (int)WallBlendModes::SubClamp; }; + struct RevSubClampWall { static const int Mode = (int)WallBlendModes::RevSubClamp; }; + + enum class FilterModes { Nearest, Linear }; + struct NearestFilter { static const int Mode = (int)FilterModes::Nearest; }; + struct LinearFilter { static const int Mode = (int)FilterModes::Linear; }; + + enum class ShadeMode { Simple, Advanced }; + struct SimpleShade { static const int Mode = (int)ShadeMode::Simple; }; + struct AdvancedShade { static const int Mode = (int)ShadeMode::Advanced; }; + } + + template + class DrawWall32T : public DrawerCommand + { + protected: + WallDrawerArgs args; + + public: + DrawWall32T(const WallDrawerArgs &drawerargs) : args(drawerargs) { } + + void Execute(DrawerThread *thread) override + { + using namespace DrawWall32TModes; + + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + bool is_nearest_filter = (source2 == nullptr); + auto shade_constants = args.ColormapConstants(); + if (shade_constants.simple_shade) + { + if (is_nearest_filter) + Loop(thread, shade_constants); + else + Loop(thread, shade_constants); + } + else + { + if (is_nearest_filter) + Loop(thread, shade_constants); + else + Loop(thread, shade_constants); + } + } + + template + FORCEINLINE void Loop(DrawerThread *thread, ShadeConstants shade_constants) + { + using namespace DrawWall32TModes; + + const uint32_t *source = (const uint32_t*)args.TexturePixels(); + const uint32_t *source2 = (const uint32_t*)args.TexturePixels2(); + int textureheight = args.TextureHeight(); + uint32_t one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; + + // Shade constants + uint32_t light = 256 - (args.Light() >> (FRACBITS - 8)); + uint32_t inv_light = 256; + + int inv_desaturate; + BgraColor shade_fade, shade_light; + int desaturate; + if (ShadeModeT::Mode == (int)ShadeMode::Advanced) + { + inv_desaturate = 256 - shade_constants.desaturate; + shade_fade.r = shade_constants.fade_red * inv_light; + shade_fade.g = shade_constants.fade_green * inv_light; + shade_fade.b = shade_constants.fade_blue * inv_light; + shade_light.r = shade_constants.light_red; + shade_light.g = shade_constants.light_green; + shade_light.b = shade_constants.light_blue; + desaturate = shade_constants.desaturate; + } + else + { + inv_desaturate = 0; + shade_fade.r = 0; + shade_fade.g = 0; + shade_fade.b = 0; + shade_light.r = 0; + shade_light.g = 0; + shade_light.b = 0; + desaturate = 0; + } + + int count = args.Count(); + int pitch = args.Viewport()->RenderTarget->GetPitch(); + uint32_t fracstep = args.TextureVStep(); + uint32_t frac = args.TextureVPos(); + uint32_t texturefracx = args.TextureUPos(); + uint32_t *dest = (uint32_t*)args.Dest(); + int dest_y = args.DestY(); + + auto lights = args.dc_lights; + auto num_lights = args.dc_num_lights; + float viewpos_z = args.dc_viewpos.Z + args.dc_viewpos_step.Z * thread->skipped_by_thread(dest_y); + float step_viewpos_z = args.dc_viewpos_step.Z * thread->num_cores; + + count = thread->count_for_thread(dest_y, count); + if (count <= 0) return; + frac += thread->skipped_by_thread(dest_y) * fracstep; + dest = thread->dest_for_thread(dest_y, pitch, dest); + fracstep *= thread->num_cores; + pitch *= thread->num_cores; + + if (FilterModeT::Mode == (int)FilterModes::Linear) + { + frac -= one / 2; + } + + uint32_t srcalpha = args.SrcAlpha() >> (FRACBITS - 8); + uint32_t destalpha = args.DestAlpha() >> (FRACBITS - 8); + + for (int index = 0; index < count; index++) + { + BgraColor bgcolor; + if (BlendT::Mode != (int)WallBlendModes::Opaque) + { + bgcolor = *dest; + } + else + { + bgcolor = 0; + } + + uint32_t ifgcolor = Sample(frac, source, source2, textureheight, one, texturefracx); + BgraColor fgcolor = Shade(ifgcolor, light, desaturate, inv_desaturate, shade_fade, shade_light, lights, num_lights, viewpos_z); + BgraColor outcolor = Blend(fgcolor, bgcolor, ifgcolor, srcalpha, destalpha); + + *dest = outcolor; + dest += pitch; + frac += fracstep; + viewpos_z += step_viewpos_z; + } + } + + template + FORCEINLINE BgraColor Sample(uint32_t frac, const uint32_t *source, const uint32_t *source2, int textureheight, uint32_t one, uint32_t texturefracx) + { + using namespace DrawWall32TModes; + + if (FilterModeT::Mode == (int)FilterModes::Nearest) + { + int sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index]; + } + else + { + unsigned int frac_y0 = (frac >> FRACBITS) * textureheight; + unsigned int frac_y1 = ((frac + one) >> FRACBITS) * textureheight; + unsigned int y0 = frac_y0 >> FRACBITS; + unsigned int y1 = frac_y1 >> FRACBITS; + + unsigned int p00 = source[y0]; + unsigned int p01 = source[y1]; + unsigned int p10 = source2[y0]; + unsigned int p11 = source2[y1]; + + unsigned int inv_b = texturefracx; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; + unsigned int b = 16 - inv_b; + + BgraColor result; + result.r = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; + result.g = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8; + result.b = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8; + result.a = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8; + return result; + } + } + + template + FORCEINLINE BgraColor Shade(BgraColor fgcolor, uint32_t light, uint32_t desaturate, uint32_t inv_desaturate, BgraColor shade_fade, BgraColor shade_light, const DrawerLight *lights, int num_lights, float viewpos_z) + { + using namespace DrawWall32TModes; + + BgraColor material = fgcolor; + if (ShadeModeT::Mode == (int)ShadeMode::Simple) + { + fgcolor.r = (fgcolor.r * light) >> 8; + fgcolor.g = (fgcolor.g * light) >> 8; + fgcolor.b = (fgcolor.b * light) >> 8; + } + else + { + uint32_t intensity = ((fgcolor.r * 77 + fgcolor.g * 143 + fgcolor.b * 37) >> 8) * desaturate; + fgcolor.r = (((shade_fade.r + ((fgcolor.r * inv_desaturate + intensity) >> 8) * light) >> 8) * shade_light.r) >> 8; + fgcolor.g = (((shade_fade.g + ((fgcolor.g * inv_desaturate + intensity) >> 8) * light) >> 8) * shade_light.g) >> 8; + fgcolor.b = (((shade_fade.b + ((fgcolor.b * inv_desaturate + intensity) >> 8) * light) >> 8) * shade_light.b) >> 8; + } + + return AddLights(material, fgcolor, lights, num_lights, viewpos_z); + } + + FORCEINLINE BgraColor AddLights(BgraColor material, BgraColor fgcolor, const DrawerLight *lights, int num_lights, float viewpos_z) + { + using namespace DrawWall32TModes; + + BgraColor lit; + lit.r = 0; + lit.g = 0; + lit.b = 0; + + for (int i = 0; i != num_lights; i++) + { + float light_x = lights[i].x; + float light_y = lights[i].y; + float light_z = lights[i].z; + float light_radius = lights[i].radius; + + // L = light-pos + // dist = sqrt(dot(L, L)) + // distance_attenuation = 1 - MIN(dist * (1/radius), 1) + float Lxy2 = light_x; // L.x*L.x + L.y*L.y + float Lz = light_z - viewpos_z; + float dist2 = Lxy2 + Lz * Lz; + float rcp_dist = _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(dist2))); + float dist = dist2 * rcp_dist; + float distance_attenuation = 256.0f - MIN(dist * light_radius, 256.0f); + + // The simple light type + float simple_attenuation = distance_attenuation; + + // The point light type + // diffuse = dot(N,L) * attenuation + float point_attenuation = light_y * rcp_dist * distance_attenuation; + + uint32_t attenuation = (int32_t)((light_y == 0.0f) ? simple_attenuation : point_attenuation); + + BgraColor light_color = lights[i].color; + + lit.r += (light_color.r * attenuation) >> 8; + lit.g += (light_color.g * attenuation) >> 8; + lit.b += (light_color.b * attenuation) >> 8; + } + + fgcolor.r = MIN(fgcolor.r + ((material.r * lit.r) >> 8), 255); + fgcolor.g = MIN(fgcolor.g + ((material.g * lit.g) >> 8), 255); + fgcolor.b = MIN(fgcolor.b + ((material.b * lit.b) >> 8), 255); + return fgcolor; + } + + FORCEINLINE BgraColor Blend(BgraColor fgcolor, BgraColor bgcolor, unsigned int ifgcolor, uint32_t srcalpha, uint32_t destalpha) + { + using namespace DrawWall32TModes; + + if (BlendT::Mode == (int)WallBlendModes::Opaque) + { + return fgcolor; + } + else if (BlendT::Mode == (int)WallBlendModes::Masked) + { + return (ifgcolor == 0) ? bgcolor : fgcolor; + } + else + { + uint32_t alpha = APART(ifgcolor); + alpha += alpha >> 7; // 255->256 + uint32_t inv_alpha = 256 - alpha; + + uint32_t bgalpha = (destalpha * alpha + (inv_alpha << 8) + 128) >> 8; + uint32_t fgalpha = (srcalpha * alpha + 128) >> 8; + + fgcolor.r *= fgalpha; + fgcolor.g *= fgalpha; + fgcolor.b *= fgalpha; + bgcolor.r *= bgalpha; + bgcolor.g *= bgalpha; + bgcolor.b *= bgalpha; + + BgraColor outcolor; + if (BlendT::Mode == (int)WallBlendModes::AddClamp) + { + outcolor.r = MIN((fgcolor.r + bgcolor.r) >> 8, 255); + outcolor.g = MIN((fgcolor.g + bgcolor.g) >> 8, 255); + outcolor.b = MIN((fgcolor.b + bgcolor.b) >> 8, 255); + } + else if (BlendT::Mode == (int)WallBlendModes::SubClamp) + { + outcolor.r = MAX(int32_t(fgcolor.r - bgcolor.r) >> 8, 0); + outcolor.g = MAX(int32_t(fgcolor.g - bgcolor.g) >> 8, 0); + outcolor.b = MAX(int32_t(fgcolor.b - bgcolor.b) >> 8, 0); + } + else if (BlendT::Mode == (int)WallBlendModes::RevSubClamp) + { + outcolor.r = MAX(int32_t(bgcolor.r - fgcolor.r) >> 8, 0); + outcolor.g = MAX(int32_t(bgcolor.g - fgcolor.g) >> 8, 0); + outcolor.b = MAX(int32_t(bgcolor.b - fgcolor.b) >> 8, 0); + } + outcolor.a = 255; + return outcolor; + } + } + + FString DebugInfo() override { return "DrawWall32T"; } + }; + + typedef DrawWall32T DrawWall32Command; + typedef DrawWall32T DrawWallMasked32Command; + typedef DrawWall32T DrawWallAddClamp32Command; + typedef DrawWall32T DrawWallSubClamp32Command; + typedef DrawWall32T DrawWallRevSubClamp32Command; +}