diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 48bbc2c38..d90c057a3 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -39,10 +39,17 @@ #include "gl/data/gl_matrix.h" #include "swrenderer/viewport/r_viewport.h" #include "swrenderer/scene/r_light.h" +#ifdef NO_SSE +#include "r_draw_wall32.h" +#include "r_draw_sprite32.h" +#include "r_draw_span32.h" +#include "r_draw_sky32.h" +#else #include "r_draw_wall32_sse2.h" #include "r_draw_sprite32_sse2.h" #include "r_draw_span32_sse2.h" #include "r_draw_sky32_sse2.h" +#endif #include "gi.h" #include "stats.h" diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 3e95c8cb6..263a5a643 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -387,4 +387,68 @@ namespace swrenderer return alpha | (red << 16) | (green << 8) | blue; } }; + + ///////////////////////////////////////////////////////////////////////////// + // Vector classes for non-SSE drawers that behave like their SSE counterparts + + namespace drawervectors + { + struct vec4ui + { + vec4ui() {} + vec4ui(uint32_t v) : a(v), r(v), g(v), b(v) { } + vec4ui(uint32_t a, uint32_t r, uint32_t g, uint32_t b) : a(a), r(r), g(g), b(b) { } + uint32_t a, r, g, b; + }; + + struct vec8us + { + vec8us() {} + vec8us(uint16_t v) : a0(v), r0(v), g0(v), b0(v) { } + vec8us(uint16_t a0, uint16_t r0, uint16_t g0, uint16_t b0, uint16_t a1, uint16_t r1, uint16_t g1, uint16_t b1) : a0(a0), r0(r0), g0(g0), b0(b0), a1(a1), r1(r1), g1(g1), b1(b1) { } + uint16_t a0, r0, g0, b0, a1, r1, g1, b1; + }; + + inline vec8us unpack(uint32_t lo, uint32_t hi) { return vec8us(APART(lo), RPART(lo), GPART(lo), BPART(lo), APART(hi), RPART(hi), GPART(hi), BPART(hi)); } + inline vec4ui unpacklo(vec8us v) { return vec4ui(v.a0, v.r0, v.g0, v.b0); } + inline vec4ui unpackhi(vec8us v) { return vec4ui(v.a1, v.r1, v.g1, v.b1); } + + inline vec8us pack(vec4ui lo, vec4ui hi) + { + return vec8us(lo.a, lo.r, lo.g, lo.b, hi.a, hi.r, hi.g, hi.b); + } + inline uint32_t packlo(vec8us v) + { + return MAKEARGB((uint32_t)clamp(v.a0, 0, 255), (uint32_t)clamp(v.r0, 0, 255), (uint32_t)clamp(v.g0, 0, 255), (uint32_t)clamp(v.b0, 0, 255)); + } + inline uint32_t packhi(vec8us v) + { + return MAKEARGB((uint32_t)clamp(v.a1, 0, 255), (uint32_t)clamp(v.r1, 0, 255), (uint32_t)clamp(v.g1, 0, 255), (uint32_t)clamp(v.b1, 0, 255)); + } + + inline vec8us operator+(vec8us a, vec8us b) + { + return vec8us(a.a0 + b.a0, a.r0 + b.r0, a.g0 + b.g0, a.b0 + b.b0, a.a1 + b.a1, a.r1 + b.r1, a.g1 + b.g1, a.b1 + b.b1); + } + + inline vec8us operator-(vec8us a, vec8us b) + { + return vec8us(a.a0 - b.a0, a.r0 - b.r0, a.g0 - b.g0, a.b0 - b.b0, a.a1 - b.a1, a.r1 - b.r1, a.g1 - b.g1, a.b1 - b.b1); + } + + inline vec8us operator*(vec8us a, vec8us b) + { + return vec8us(a.a0 * b.a0, a.r0 * b.r0, a.g0 * b.g0, a.b0 * b.b0, a.a1 * b.a1, a.r1 * b.r1, a.g1 * b.g1, a.b1 * b.b1); + } + + inline vec8us operator<<(vec8us a, int bits) + { + return vec8us(a.a0 << bits, a.r0 << bits, a.g0 << bits, a.b0 << bits, a.a1 << bits, a.r1 << bits, a.g1 << bits, a.b1 << bits); + } + + inline vec8us operator>>(vec8us a, int bits) + { + return vec8us(a.a0 >> bits, a.r0 >> bits, a.g0 >> bits, a.b0 >> bits, a.a1 >> bits, a.r1 >> bits, a.g1 >> bits, a.b1 >> bits); + } + } } diff --git a/src/swrenderer/drawers/r_draw_sky32.h b/src/swrenderer/drawers/r_draw_sky32.h new file mode 100644 index 000000000..ed34c340f --- /dev/null +++ b/src/swrenderer/drawers/r_draw_sky32.h @@ -0,0 +1,319 @@ +/* +** Drawer commands for spans +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_skydrawer.h" + +namespace swrenderer +{ + + class DrawSkySingle32Command : public DrawerCommand + { + protected: + SkyDrawerArgs args; + + public: + DrawSkySingle32Command(const SkyDrawerArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override + { + using namespace drawervectors; + + uint32_t *dest = (uint32_t *)args.Dest(); + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); + + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + bool fadeSky = args.FadeSky(); + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(args.DestY(), count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + dest += pitch; + frac += fracstep; + } + + return; + } + + vec8us solid_top_fill = unpack(solid_top, 0); + vec8us solid_bottom_fill = unpack(solid_bottom, 0); + + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + + vec8us alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0); + vec8us inv_alpha = vec8us(256) - alpha; + + vec8us c = unpack(fg, 0); + c = (c * alpha + solid_top_fill * inv_alpha) >> 8; + *dest = packlo(c); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + + vec8us alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + vec8us inv_alpha = vec8us(256) - alpha; + + vec8us c = unpack(fg, 0); + c = (c * alpha + solid_top_fill * inv_alpha) >> 8; + *dest = packlo(c); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom; + dest += pitch; + index += num_cores; + } + } + + FString DebugInfo() override { return "DrawSkySingle32Command"; } + }; + + class DrawSkyDouble32Command : public DrawerCommand + { + protected: + SkyDrawerArgs args; + + public: + DrawSkyDouble32Command(const SkyDrawerArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override + { + using namespace drawervectors; + + uint32_t *dest = (uint32_t *)args.Dest(); + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); + const uint32_t *source1 = (const uint32_t *)args.BackTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + uint32_t maxtextureheight1 = args.BackTextureHeight() - 1; + + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); + + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + bool fadeSky = args.FadeSky(); + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(args.DestY(), count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + *dest = fg; + dest += pitch; + frac += fracstep; + } + + return; + } + + vec8us solid_top_fill = unpack(solid_top, 0); + vec8us solid_bottom_fill = unpack(solid_bottom, 0); + + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + vec8us alpha = MAX(MIN(frac >> (16 - start_fade), 256), 0); + vec8us inv_alpha = vec8us(256) - alpha; + + vec8us c = unpack(fg, 0); + c = (c * alpha + solid_top_fill * inv_alpha) >> 8; + *dest = packlo(c); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + *dest = fg; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + vec8us alpha = MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0); + vec8us inv_alpha = vec8us(256) - alpha; + + vec8us c = unpack(fg, 0); + c = (c * alpha + solid_top_fill * inv_alpha) >> 8; + *dest = packlo(c); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom; + dest += pitch; + index += num_cores; + } + } + + FString DebugInfo() override { return "DrawSkyDouble32Command"; } + }; +}