diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 532aa09400..f60f1447c4 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -43,6 +43,7 @@ #include "r_draw_wall32.h" #include "r_draw_sprite32.h" #include "r_draw_span32.h" +#include "r_draw_sky32.h" #include "gi.h" #include "stats.h" @@ -294,6 +295,22 @@ namespace swrenderer else Queue->Push(args); } + + void SWTruecolorDrawers::DrawSingleSkyColumn(const SkyDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawDoubleSkyColumn(const SkyDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 52452fab05..a3ec7e5f94 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -365,8 +365,8 @@ namespace swrenderer void DrawWallAddClampColumn(const WallDrawerArgs &args) override; void DrawWallSubClampColumn(const WallDrawerArgs &args) override; void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override; - void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } - void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } + void DrawSingleSkyColumn(const SkyDrawerArgs &args) override; + void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override; void DrawColumn(const SpriteDrawerArgs &args) override; void FillColumn(const SpriteDrawerArgs &args) override; void FillAddColumn(const SpriteDrawerArgs &args) override; diff --git a/src/swrenderer/drawers/r_draw_sky32.h b/src/swrenderer/drawers/r_draw_sky32.h new file mode 100644 index 0000000000..f047c77957 --- /dev/null +++ b/src/swrenderer/drawers/r_draw_sky32.h @@ -0,0 +1,314 @@ +/* +** Drawer commands for spans +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ + +#pragma once + +#include "swrenderer/drawers/r_draw_rgba.h" +#include "swrenderer/viewport/r_skydrawer.h" + +namespace swrenderer +{ + class DrawSkySingle32Command : public DrawerCommand + { + protected: + SkyDrawerArgs args; + + public: + DrawSkySingle32Command(const SkyDrawerArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t *)args.Dest(); + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); + + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + bool fadeSky = args.FadeSky(); + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(args.DestY(), count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + dest += pitch; + frac += fracstep; + } + + return; + } + + __m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128()); + __m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128()); + + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + + __m128i alpha = _mm_set1_epi16(MAX(MIN(frac >> (16 - start_fade), 256), 0)); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i c = _mm_unpacklo_epi8(_mm_cvtsi32_si128(fg), _mm_setzero_si128()); + c = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(c, alpha), _mm_mullo_epi16(solid_top_fill, inv_alpha)), 8); + *dest = _mm_cvtsi128_si32(_mm_packus_epi16(c, _mm_setzero_si128())); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + *dest = source0[sample_index]; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + + __m128i alpha = _mm_set1_epi16(MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0)); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i c = _mm_unpacklo_epi8(_mm_cvtsi32_si128(fg), _mm_setzero_si128()); + c = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(c, alpha), _mm_mullo_epi16(solid_top_fill, inv_alpha)), 8); + *dest = _mm_cvtsi128_si32(_mm_packus_epi16(c, _mm_setzero_si128())); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom; + dest += pitch; + index += num_cores; + } + } + + FString DebugInfo() override { return "DrawSkySingle32Command"; } + }; + + class DrawSkyDouble32Command : public DrawerCommand + { + protected: + SkyDrawerArgs args; + + public: + DrawSkyDouble32Command(const SkyDrawerArgs &args) : args(args) { } + + void Execute(DrawerThread *thread) override + { + uint32_t *dest = (uint32_t *)args.Dest(); + int count = args.Count(); + int pitch = RenderViewport::Instance()->RenderTarget->GetPitch(); + const uint32_t *source0 = (const uint32_t *)args.FrontTexturePixels(); + const uint32_t *source1 = (const uint32_t *)args.BackTexturePixels(); + int textureheight0 = args.FrontTextureHeight(); + uint32_t maxtextureheight1 = args.BackTextureHeight() - 1; + + int32_t frac = args.TextureVPos(); + int32_t fracstep = args.TextureVStep(); + + uint32_t solid_top = args.SolidTopColor(); + uint32_t solid_bottom = args.SolidBottomColor(); + bool fadeSky = args.FadeSky(); + + // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: + int start_fade = 2; // How fast it should fade out + int fade_length = (1 << (24 - start_fade)); + int start_fadetop_y = (-frac) / fracstep; + int end_fadetop_y = (fade_length - frac) / fracstep; + int start_fadebottom_y = ((2 << 24) - fade_length - frac) / fracstep; + int end_fadebottom_y = ((2 << 24) - frac) / fracstep; + start_fadetop_y = clamp(start_fadetop_y, 0, count); + end_fadetop_y = clamp(end_fadetop_y, 0, count); + start_fadebottom_y = clamp(start_fadebottom_y, 0, count); + end_fadebottom_y = clamp(end_fadebottom_y, 0, count); + + int num_cores = thread->num_cores; + int skipped = thread->skipped_by_thread(args.DestY()); + dest = thread->dest_for_thread(args.DestY(), pitch, dest); + frac += fracstep * skipped; + fracstep *= num_cores; + pitch *= num_cores; + + if (!fadeSky) + { + count = thread->count_for_thread(args.DestY(), count); + + for (int index = 0; index < count; index++) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + *dest = fg; + dest += pitch; + frac += fracstep; + } + + return; + } + + __m128i solid_top_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_top), _mm_setzero_si128()); + __m128i solid_bottom_fill = _mm_unpacklo_epi8(_mm_cvtsi32_si128(solid_bottom), _mm_setzero_si128()); + + int index = skipped; + + // Top solid color: + while (index < start_fadetop_y) + { + *dest = solid_top; + dest += pitch; + frac += fracstep; + index += num_cores; + } + + // Top fade: + while (index < end_fadetop_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + __m128i alpha = _mm_set1_epi16(MAX(MIN(frac >> (16 - start_fade), 256), 0)); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i c = _mm_unpacklo_epi8(_mm_cvtsi32_si128(fg), _mm_setzero_si128()); + c = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(c, alpha), _mm_mullo_epi16(solid_top_fill, inv_alpha)), 8); + *dest = _mm_cvtsi128_si32(_mm_packus_epi16(c, _mm_setzero_si128())); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Textured center: + while (index < start_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + *dest = fg; + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Fade bottom: + while (index < end_fadebottom_y) + { + uint32_t sample_index = (((((uint32_t)frac) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; + uint32_t fg = source0[sample_index]; + if (fg == 0) + { + uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); + fg = source1[sample_index2]; + } + + __m128i alpha = _mm_set1_epi16(MAX(MIN(((2 << 24) - frac) >> (16 - start_fade), 256), 0)); + __m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha); + + __m128i c = _mm_unpacklo_epi8(_mm_cvtsi32_si128(fg), _mm_setzero_si128()); + c = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(c, alpha), _mm_mullo_epi16(solid_top_fill, inv_alpha)), 8); + *dest = _mm_cvtsi128_si32(_mm_packus_epi16(c, _mm_setzero_si128())); + + frac += fracstep; + dest += pitch; + index += num_cores; + } + + // Bottom solid color: + while (index < count) + { + *dest = solid_bottom; + dest += pitch; + index += num_cores; + } + } + + FString DebugInfo() override { return "DrawSkyDouble32Command"; } + }; +}