diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 699352a6b0..bbedb2622a 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -60,18 +60,14 @@ CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG namespace swrenderer { + extern "C" short spanend[MAXHEIGHT]; + extern float rw_light; + extern float rw_lightstep; + extern int wallshade; -extern "C" short spanend[MAXHEIGHT]; -extern float rw_light; -extern float rw_lightstep; -extern int wallshade; + ///////////////////////////////////////////////////////////////////////////// -///////////////////////////////////////////////////////////////////////////// - -class DrawSpanLLVMCommand : public DrawerCommand -{ -public: - DrawSpanLLVMCommand() + DrawSpanLLVMCommand::DrawSpanLLVMCommand() { using namespace drawerargs; @@ -106,23 +102,19 @@ public: args.flags |= DrawSpanArgs::nearest_filter; } - void Execute(DrawerThread *thread) override + void DrawSpanLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpan(&args); } - FString DebugInfo() override + FString DrawSpanLLVMCommand::DebugInfo() { return "DrawSpan\n" + args.ToString(); } -protected: - DrawSpanArgs args; - -private: - inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped) + bool DrawSpanLLVMCommand::sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped) { using namespace drawerargs; @@ -144,71 +136,47 @@ private: return (magnifying && r_magfilter) || (!magnifying && r_minfilter); } -}; -class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + ///////////////////////////////////////////////////////////////////////////// + + void DrawSpanMaskedLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanMasked(&args); } -}; -class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + void DrawSpanTranslucentLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanTranslucent(&args); } -}; -class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + void DrawSpanMaskedTranslucentLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanMaskedTranslucent(&args); } -}; -class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + void DrawSpanAddClampLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanAddClamp(&args); } -}; -class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand -{ -public: - void Execute(DrawerThread *thread) override + void DrawSpanMaskedAddClampLLVMCommand::Execute(DrawerThread *thread) { if (thread->skipped_by_thread(args.y)) return; Drawers::Instance()->DrawSpanMaskedAddClamp(&args); } -}; -///////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////// -class DrawWall4LLVMCommand : public DrawerCommand -{ -protected: - DrawWallArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawWall4LLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -218,8 +186,7 @@ protected: return d; } -public: - DrawWall4LLVMCommand() + DrawWall4LLVMCommand::DrawWall4LLVMCommand() { using namespace drawerargs; @@ -257,24 +224,20 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - void Execute(DrawerThread *thread) override + void DrawWall4LLVMCommand::Execute(DrawerThread *thread) { WorkerThreadData d = ThreadData(thread); Drawers::Instance()->vlinec4(&args, &d); } - FString DebugInfo() override + FString DrawWall4LLVMCommand::DebugInfo() { return "DrawWall4\n" + args.ToString(); } -}; -class DrawWall1LLVMCommand : public DrawerCommand -{ -protected: - DrawWallArgs args; + ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawWall1LLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -284,8 +247,7 @@ protected: return d; } -public: - DrawWall1LLVMCommand() + DrawWall1LLVMCommand::DrawWall1LLVMCommand() { using namespace drawerargs; @@ -320,24 +282,20 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - void Execute(DrawerThread *thread) override + void DrawWall1LLVMCommand::Execute(DrawerThread *thread) { WorkerThreadData d = ThreadData(thread); Drawers::Instance()->vlinec1(&args, &d); } - FString DebugInfo() override + FString DrawWall1LLVMCommand::DebugInfo() { return "DrawWall1\n" + args.ToString(); } -}; -class DrawColumnLLVMCommand : public DrawerCommand -{ -protected: - DrawColumnArgs args; + ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawColumnLLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -347,13 +305,12 @@ protected: return d; } - FString DebugInfo() override + FString DrawColumnLLVMCommand::DebugInfo() { return "DrawColumn\n" + args.ToString(); } -public: - DrawColumnLLVMCommand() + DrawColumnLLVMCommand::DrawColumnLLVMCommand() { using namespace drawerargs; @@ -393,19 +350,15 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - void Execute(DrawerThread *thread) override + void DrawColumnLLVMCommand::Execute(DrawerThread *thread) { WorkerThreadData d = ThreadData(thread); Drawers::Instance()->DrawColumn(&args, &d); } -}; -class DrawSkyLLVMCommand : public DrawerCommand -{ -protected: - DrawSkyArgs args; + ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawSkyLLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -415,8 +368,7 @@ protected: return d; } -public: - DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom) + DrawSkyLLVMCommand::DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom) { using namespace drawerargs; @@ -439,70 +391,14 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - FString DebugInfo() override + FString DrawSkyLLVMCommand::DebugInfo() { return "DrawSky\n" + args.ToString(); } -}; -#define DECLARE_DRAW_COMMAND(name, func, base) \ -class name##LLVMCommand : public base \ -{ \ -public: \ - using base::base; \ - void Execute(DrawerThread *thread) override \ - { \ - WorkerThreadData d = ThreadData(thread); \ - Drawers::Instance()->func(&args, &d); \ - } \ -}; + ///////////////////////////////////////////////////////////////////////////// -//DECLARE_DRAW_COMMAND(name, func, DrawSpanLLVMCommand); - -DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); -DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); -DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand); -DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); -DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand); - -///////////////////////////////////////////////////////////////////////////// - -class DrawFuzzColumnRGBACommand : public DrawerCommand -{ - int _x; - int _yl; - int _yh; - BYTE * RESTRICT _destorg; - int _pitch; - int _fuzzpos; - int _fuzzviewheight; - -public: - DrawFuzzColumnRGBACommand() + DrawFuzzColumnRGBACommand::DrawFuzzColumnRGBACommand() { using namespace drawerargs; @@ -515,7 +411,7 @@ public: _fuzzviewheight = fuzzviewheight; } - void Execute(DrawerThread *thread) override + void DrawFuzzColumnRGBACommand::Execute(DrawerThread *thread) { int yl = MAX(_yl, 1); int yh = MIN(_yh, _fuzzviewheight); @@ -605,23 +501,14 @@ public: } } - FString DebugInfo() override + FString DrawFuzzColumnRGBACommand::DebugInfo() { return "DrawFuzzColumn"; } -}; -class FillSpanRGBACommand : public DrawerCommand -{ - int _x1; - int _x2; - int _y; - BYTE * RESTRICT _destorg; - fixed_t _light; - int _color; + ///////////////////////////////////////////////////////////////////////////// -public: - FillSpanRGBACommand() + FillSpanRGBACommand::FillSpanRGBACommand() { using namespace drawerargs; @@ -633,7 +520,7 @@ public: _color = ds_color; } - void Execute(DrawerThread *thread) override + void FillSpanRGBACommand::Execute(DrawerThread *thread) { if (thread->line_skipped_by_thread(_y)) return; @@ -646,30 +533,14 @@ public: dest[i] = color; } - FString DebugInfo() override + FString FillSpanRGBACommand::DebugInfo() { return "FillSpan"; } -}; -///////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////// -class DrawSlabRGBACommand : public DrawerCommand -{ - int _dx; - fixed_t _v; - int _dy; - fixed_t _vi; - const BYTE *_voxelptr; - uint32_t *_p; - ShadeConstants _shade_constants; - const BYTE *_colormap; - fixed_t _light; - int _pitch; - int _start_y; - -public: - DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p, ShadeConstants shade_constants, const BYTE *colormap, fixed_t light) + DrawSlabRGBACommand::DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, ShadeConstants shade_constants, const uint8_t *colormap, fixed_t light) { using namespace drawerargs; @@ -687,16 +558,16 @@ public: assert(dx > 0); } - void Execute(DrawerThread *thread) override + void DrawSlabRGBACommand::Execute(DrawerThread *thread) { int dx = _dx; fixed_t v = _v; int dy = _dy; fixed_t vi = _vi; - const BYTE *vptr = _voxelptr; + const uint8_t *vptr = _voxelptr; uint32_t *p = _p; ShadeConstants shade_constants = _shade_constants; - const BYTE *colormap = _colormap; + const uint8_t *colormap = _colormap; uint32_t light = LightBgra::calc_light_multiplier(_light); int pitch = _pitch; int x; @@ -772,25 +643,14 @@ public: } } - FString DebugInfo() override + FString DrawSlabRGBACommand::DebugInfo() { return "DrawSlab"; } -}; -///////////////////////////////////////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////// -class DrawFogBoundaryLineRGBACommand : public DrawerCommand -{ - int _y; - int _x; - int _x2; - BYTE * RESTRICT _destorg; - fixed_t _light; - ShadeConstants _shade_constants; - -public: - DrawFogBoundaryLineRGBACommand(int y, int x, int x2) + DrawFogBoundaryLineRGBACommand::DrawFogBoundaryLineRGBACommand(int y, int x, int x2) { using namespace drawerargs; @@ -803,7 +663,7 @@ public: _shade_constants = dc_shade_constants; } - void Execute(DrawerThread *thread) override + void DrawFogBoundaryLineRGBACommand::Execute(DrawerThread *thread) { if (thread->line_skipped_by_thread(_y)) return; @@ -853,34 +713,14 @@ public: } while (++x <= x2); } - FString DebugInfo() override + FString DrawFogBoundaryLineRGBACommand::DebugInfo() { return "DrawFogBoundaryLine"; } -}; -class DrawTiltedSpanRGBACommand : public DrawerCommand -{ - int _x1; - int _x2; - int _y; - BYTE * RESTRICT _destorg; - fixed_t _light; - ShadeConstants _shade_constants; - FVector3 _plane_sz; - FVector3 _plane_su; - FVector3 _plane_sv; - bool _plane_shade; - int _planeshade; - float _planelightfloat; - fixed_t _pviewx; - fixed_t _pviewy; - int _xbits; - int _ybits; - const uint32_t * RESTRICT _source; + ///////////////////////////////////////////////////////////////////////////// -public: - DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + DrawTiltedSpanRGBACommand::DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) { using namespace drawerargs; @@ -903,7 +743,7 @@ public: _ybits = ds_ybits; } - void Execute(DrawerThread *thread) override + void DrawTiltedSpanRGBACommand::Execute(DrawerThread *thread) { if (thread->line_skipped_by_thread(_y)) return; @@ -1009,23 +849,14 @@ public: } } - FString DebugInfo() override + FString DrawTiltedSpanRGBACommand::DebugInfo() { return "DrawTiltedSpan"; } -}; -class DrawColoredSpanRGBACommand : public DrawerCommand -{ - int _y; - int _x1; - int _x2; - BYTE * RESTRICT _destorg; - fixed_t _light; - int _color; + ///////////////////////////////////////////////////////////////////////////// -public: - DrawColoredSpanRGBACommand(int y, int x1, int x2) + DrawColoredSpanRGBACommand::DrawColoredSpanRGBACommand(int y, int x1, int x2) { using namespace drawerargs; @@ -1038,7 +869,7 @@ public: _color = ds_color; } - void Execute(DrawerThread *thread) override + void DrawColoredSpanRGBACommand::Execute(DrawerThread *thread) { if (thread->line_skipped_by_thread(_y)) return; @@ -1055,25 +886,14 @@ public: dest[i] = color; } - FString DebugInfo() override + FString DrawColoredSpanRGBACommand::DebugInfo() { return "DrawColoredSpan"; } -}; -class FillTransColumnRGBACommand : public DrawerCommand -{ - int _x; - int _y1; - int _y2; - int _color; - int _a; - BYTE * RESTRICT _destorg; - int _pitch; - fixed_t _light; + ///////////////////////////////////////////////////////////////////////////// -public: - FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) + FillTransColumnRGBACommand::FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a) { using namespace drawerargs; @@ -1087,7 +907,7 @@ public: _pitch = dc_pitch; } - void Execute(DrawerThread *thread) override + void FillTransColumnRGBACommand::Execute(DrawerThread *thread) { int x = _x; int y1 = _y1; @@ -1129,553 +949,163 @@ public: } } - FString DebugInfo() override + FString FillTransColumnRGBACommand::DebugInfo() { return "FillTransColumn"; } -}; -ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) -{ - buffer = screen->GetBuffer(); - pitch = screen->GetPitch(); - width = screen->GetWidth(); - height = screen->GetHeight(); + ///////////////////////////////////////////////////////////////////////////// - start_red = (int)(colormap->ColorizeStart[0] * 255); - start_green = (int)(colormap->ColorizeStart[1] * 255); - start_blue = (int)(colormap->ColorizeStart[2] * 255); - end_red = (int)(colormap->ColorizeEnd[0] * 255); - end_green = (int)(colormap->ColorizeEnd[1] * 255); - end_blue = (int)(colormap->ColorizeEnd[2] * 255); -} + ApplySpecialColormapRGBACommand::ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen) + { + buffer = screen->GetBuffer(); + pitch = screen->GetPitch(); + width = screen->GetWidth(); + height = screen->GetHeight(); + + start_red = (int)(colormap->ColorizeStart[0] * 255); + start_green = (int)(colormap->ColorizeStart[1] * 255); + start_blue = (int)(colormap->ColorizeStart[2] * 255); + end_red = (int)(colormap->ColorizeEnd[0] * 255); + end_green = (int)(colormap->ColorizeEnd[1] * 255); + end_blue = (int)(colormap->ColorizeEnd[2] * 255); + } #ifdef NO_SSE -void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) -{ - int y = thread->skipped_by_thread(0); - int count = thread->count_for_thread(0, height); - while (count > 0) + void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) { - BYTE *pixels = buffer + y * pitch * 4; - for (int x = 0; x < width; x++) + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + while (count > 0) { - int fg_red = pixels[2]; - int fg_green = pixels[1]; - int fg_blue = pixels[0]; + uint8_t *pixels = buffer + y * pitch * 4; + for (int x = 0; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; - int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; - gray += (gray >> 7); // gray*=256/255 - int inv_gray = 256 - gray; + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; - int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); - int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); - int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); - pixels[0] = (BYTE)blue; - pixels[1] = (BYTE)green; - pixels[2] = (BYTE)red; - pixels[3] = 0xff; + pixels[0] = (uint8_t)blue; + pixels[1] = (uint8_t)green; + pixels[2] = (uint8_t)red; + pixels[3] = 0xff; - pixels += 4; + pixels += 4; + } + y += thread->num_cores; + count--; } - y += thread->num_cores; - count--; } -} #else -void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) -{ - int y = thread->skipped_by_thread(0); - int count = thread->count_for_thread(0, height); - __m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37); - __m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue); - while (count > 0) + void ApplySpecialColormapRGBACommand::Execute(DrawerThread *thread) { - BYTE *pixels = buffer + y * pitch * 4; - int sse_length = width / 4; - for (int x = 0; x < sse_length; x++) + int y = thread->skipped_by_thread(0); + int count = thread->count_for_thread(0, height); + __m128i gray_weight = _mm_set_epi16(256, 77, 143, 37, 256, 77, 143, 37); + __m128i start_end = _mm_set_epi16(255, start_red, start_green, start_blue, 255, end_red, end_green, end_blue); + while (count > 0) { - // Unpack to integers: - __m128i p = _mm_loadu_si128((const __m128i*)pixels); + uint8_t *pixels = buffer + y * pitch * 4; + int sse_length = width / 4; + for (int x = 0; x < sse_length; x++) + { + // Unpack to integers: + __m128i p = _mm_loadu_si128((const __m128i*)pixels); - __m128i p16_0 = _mm_unpacklo_epi8(p, _mm_setzero_si128()); - __m128i p16_1 = _mm_unpackhi_epi8(p, _mm_setzero_si128()); + __m128i p16_0 = _mm_unpacklo_epi8(p, _mm_setzero_si128()); + __m128i p16_1 = _mm_unpackhi_epi8(p, _mm_setzero_si128()); - // Add gray weighting to colors - __m128i mullo0 = _mm_mullo_epi16(p16_0, gray_weight); - __m128i mullo1 = _mm_mullo_epi16(p16_1, gray_weight); - __m128i p32_0 = _mm_unpacklo_epi16(mullo0, _mm_setzero_si128()); - __m128i p32_1 = _mm_unpackhi_epi16(mullo0, _mm_setzero_si128()); - __m128i p32_2 = _mm_unpacklo_epi16(mullo1, _mm_setzero_si128()); - __m128i p32_3 = _mm_unpackhi_epi16(mullo1, _mm_setzero_si128()); + // Add gray weighting to colors + __m128i mullo0 = _mm_mullo_epi16(p16_0, gray_weight); + __m128i mullo1 = _mm_mullo_epi16(p16_1, gray_weight); + __m128i p32_0 = _mm_unpacklo_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_1 = _mm_unpackhi_epi16(mullo0, _mm_setzero_si128()); + __m128i p32_2 = _mm_unpacklo_epi16(mullo1, _mm_setzero_si128()); + __m128i p32_3 = _mm_unpackhi_epi16(mullo1, _mm_setzero_si128()); - // Transpose to get color components in individual vectors: - __m128 tmpx = _mm_castsi128_ps(p32_0); - __m128 tmpy = _mm_castsi128_ps(p32_1); - __m128 tmpz = _mm_castsi128_ps(p32_2); - __m128 tmpw = _mm_castsi128_ps(p32_3); - _MM_TRANSPOSE4_PS(tmpx, tmpy, tmpz, tmpw); - __m128i blue = _mm_castps_si128(tmpx); - __m128i green = _mm_castps_si128(tmpy); - __m128i red = _mm_castps_si128(tmpz); - __m128i alpha = _mm_castps_si128(tmpw); + // Transpose to get color components in individual vectors: + __m128 tmpx = _mm_castsi128_ps(p32_0); + __m128 tmpy = _mm_castsi128_ps(p32_1); + __m128 tmpz = _mm_castsi128_ps(p32_2); + __m128 tmpw = _mm_castsi128_ps(p32_3); + _MM_TRANSPOSE4_PS(tmpx, tmpy, tmpz, tmpw); + __m128i blue = _mm_castps_si128(tmpx); + __m128i green = _mm_castps_si128(tmpy); + __m128i red = _mm_castps_si128(tmpz); + __m128i alpha = _mm_castps_si128(tmpw); - // Calculate gray and 256-gray values: - __m128i gray = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(red, green), blue), 8); - __m128i inv_gray = _mm_sub_epi32(_mm_set1_epi32(256), gray); + // Calculate gray and 256-gray values: + __m128i gray = _mm_srli_epi32(_mm_add_epi32(_mm_add_epi32(red, green), blue), 8); + __m128i inv_gray = _mm_sub_epi32(_mm_set1_epi32(256), gray); - // p32 = start * inv_gray + end * gray: - __m128i gray0 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(0, 0, 0, 0)); - __m128i gray1 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(1, 1, 1, 1)); - __m128i gray2 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(2, 2, 2, 2)); - __m128i gray3 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(3, 3, 3, 3)); - __m128i inv_gray0 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(0, 0, 0, 0)); - __m128i inv_gray1 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(1, 1, 1, 1)); - __m128i inv_gray2 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(2, 2, 2, 2)); - __m128i inv_gray3 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(3, 3, 3, 3)); - __m128i gray16_0 = _mm_packs_epi32(gray0, inv_gray0); - __m128i gray16_1 = _mm_packs_epi32(gray1, inv_gray1); - __m128i gray16_2 = _mm_packs_epi32(gray2, inv_gray2); - __m128i gray16_3 = _mm_packs_epi32(gray3, inv_gray3); - __m128i gray16_0_mullo = _mm_mullo_epi16(gray16_0, start_end); - __m128i gray16_1_mullo = _mm_mullo_epi16(gray16_1, start_end); - __m128i gray16_2_mullo = _mm_mullo_epi16(gray16_2, start_end); - __m128i gray16_3_mullo = _mm_mullo_epi16(gray16_3, start_end); - __m128i gray16_0_mulhi = _mm_mulhi_epi16(gray16_0, start_end); - __m128i gray16_1_mulhi = _mm_mulhi_epi16(gray16_1, start_end); - __m128i gray16_2_mulhi = _mm_mulhi_epi16(gray16_2, start_end); - __m128i gray16_3_mulhi = _mm_mulhi_epi16(gray16_3, start_end); - p32_0 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_0_mullo, gray16_0_mulhi), _mm_unpackhi_epi16(gray16_0_mullo, gray16_0_mulhi)), 8); - p32_1 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_1_mullo, gray16_1_mulhi), _mm_unpackhi_epi16(gray16_1_mullo, gray16_1_mulhi)), 8); - p32_2 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_2_mullo, gray16_2_mulhi), _mm_unpackhi_epi16(gray16_2_mullo, gray16_2_mulhi)), 8); - p32_3 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_3_mullo, gray16_3_mulhi), _mm_unpackhi_epi16(gray16_3_mullo, gray16_3_mulhi)), 8); + // p32 = start * inv_gray + end * gray: + __m128i gray0 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i gray1 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i gray2 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i gray3 = _mm_shuffle_epi32(gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i inv_gray0 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(0, 0, 0, 0)); + __m128i inv_gray1 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(1, 1, 1, 1)); + __m128i inv_gray2 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(2, 2, 2, 2)); + __m128i inv_gray3 = _mm_shuffle_epi32(inv_gray, _MM_SHUFFLE(3, 3, 3, 3)); + __m128i gray16_0 = _mm_packs_epi32(gray0, inv_gray0); + __m128i gray16_1 = _mm_packs_epi32(gray1, inv_gray1); + __m128i gray16_2 = _mm_packs_epi32(gray2, inv_gray2); + __m128i gray16_3 = _mm_packs_epi32(gray3, inv_gray3); + __m128i gray16_0_mullo = _mm_mullo_epi16(gray16_0, start_end); + __m128i gray16_1_mullo = _mm_mullo_epi16(gray16_1, start_end); + __m128i gray16_2_mullo = _mm_mullo_epi16(gray16_2, start_end); + __m128i gray16_3_mullo = _mm_mullo_epi16(gray16_3, start_end); + __m128i gray16_0_mulhi = _mm_mulhi_epi16(gray16_0, start_end); + __m128i gray16_1_mulhi = _mm_mulhi_epi16(gray16_1, start_end); + __m128i gray16_2_mulhi = _mm_mulhi_epi16(gray16_2, start_end); + __m128i gray16_3_mulhi = _mm_mulhi_epi16(gray16_3, start_end); + p32_0 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_0_mullo, gray16_0_mulhi), _mm_unpackhi_epi16(gray16_0_mullo, gray16_0_mulhi)), 8); + p32_1 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_1_mullo, gray16_1_mulhi), _mm_unpackhi_epi16(gray16_1_mullo, gray16_1_mulhi)), 8); + p32_2 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_2_mullo, gray16_2_mulhi), _mm_unpackhi_epi16(gray16_2_mullo, gray16_2_mulhi)), 8); + p32_3 = _mm_srli_epi32(_mm_add_epi32(_mm_unpacklo_epi16(gray16_3_mullo, gray16_3_mulhi), _mm_unpackhi_epi16(gray16_3_mullo, gray16_3_mulhi)), 8); - p16_0 = _mm_packs_epi32(p32_0, p32_1); - p16_1 = _mm_packs_epi32(p32_2, p32_3); - p = _mm_packus_epi16(p16_0, p16_1); + p16_0 = _mm_packs_epi32(p32_0, p32_1); + p16_1 = _mm_packs_epi32(p32_2, p32_3); + p = _mm_packus_epi16(p16_0, p16_1); - _mm_storeu_si128((__m128i*)pixels, p); - pixels += 16; + _mm_storeu_si128((__m128i*)pixels, p); + pixels += 16; + } + + for (int x = sse_length * 4; x < width; x++) + { + int fg_red = pixels[2]; + int fg_green = pixels[1]; + int fg_blue = pixels[0]; + + int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; + gray += (gray >> 7); // gray*=256/255 + int inv_gray = 256 - gray; + + int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); + int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); + int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); + + pixels[0] = (uint8_t)blue; + pixels[1] = (uint8_t)green; + pixels[2] = (uint8_t)red; + pixels[3] = 0xff; + + pixels += 4; + } + + y += thread->num_cores; + count--; } - - for (int x = sse_length * 4; x < width; x++) - { - int fg_red = pixels[2]; - int fg_green = pixels[1]; - int fg_blue = pixels[0]; - - int gray = (fg_red * 77 + fg_green * 143 + fg_blue * 37) >> 8; - gray += (gray >> 7); // gray*=256/255 - int inv_gray = 256 - gray; - - int red = clamp((start_red * inv_gray + end_red * gray) >> 8, 0, 255); - int green = clamp((start_green * inv_gray + end_green * gray) >> 8, 0, 255); - int blue = clamp((start_blue * inv_gray + end_blue * gray) >> 8, 0, 255); - - pixels[0] = (BYTE)blue; - pixels[1] = (BYTE)green; - pixels[2] = (BYTE)red; - pixels[3] = 0xff; - - pixels += 4; - } - - y += thread->num_cores; - count--; } -} #endif -///////////////////////////////////////////////////////////////////////////// - -void R_DrawSingleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) -{ - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); -} - -void R_DrawSingleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom) -{ - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); -} - -void R_DrawDoubleSkyCol1_rgba(uint32_t solid_top, uint32_t solid_bottom) -{ - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); -} - -void R_DrawDoubleSkyCol4_rgba(uint32_t solid_top, uint32_t solid_bottom) -{ - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); -} - -void R_DrawColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillAddColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillAddClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillSubClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillRevSubClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawFuzzColumn_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - - dc_yl = MAX(dc_yl, 1); - dc_yh = MIN(dc_yh, fuzzviewheight); - if (dc_yl <= dc_yh) - fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; -} - -void R_DrawAddColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawTranslatedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawTlatedAddColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawShadedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawAddClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawAddClampTranslatedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSubClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSubClampTranslatedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawRevSubClampColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawRevSubClampTranslatedColumn_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpan_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanMasked_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanTranslucent_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanMaskedTranslucent_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanAddClamp_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawSpanMaskedAddClamp_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_FillSpan_rgba() -{ - DrawerCommandQueue::QueueCommand(); -} - -void R_DrawTiltedSpan_rgba(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) -{ - DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); -} - -void R_DrawColoredSpan_rgba(int y, int x1, int x2) -{ - DrawerCommandQueue::QueueCommand(y, x1, x2); -} - -static ShadeConstants slab_rgba_shade_constants; -static const BYTE *slab_rgba_colormap; -static fixed_t slab_rgba_light; - -void R_SetupDrawSlab_rgba(FSWColormap *base_colormap, float light, int shade) -{ - slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; - slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; - slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; - slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; - slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; - slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; - slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; - slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; - slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; - slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); - slab_rgba_colormap = base_colormap->Maps; - slab_rgba_light = LIGHTSCALE(light, shade); -} - -void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BYTE *p) -{ - DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); -} - -DWORD vlinec1_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void vlinec4_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -DWORD mvlinec1_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void mvlinec4_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -fixed_t tmvline1_add_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void tmvline4_add_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -fixed_t tmvline1_addclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void tmvline4_addclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -fixed_t tmvline1_subclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void tmvline4_subclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -fixed_t tmvline1_revsubclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - return dc_texturefrac + dc_count * dc_iscale; -} - -void tmvline4_revsubclamp_rgba() -{ - using namespace drawerargs; - - DrawerCommandQueue::QueueCommand(); - for (int i = 0; i < 4; i++) - vplce[i] += vince[i] * dc_count; -} - -void R_DrawFogBoundarySection_rgba(int y, int y2, int x1) -{ - for (; y < y2; ++y) - { - int x2 = spanend[y]; - DrawerCommandQueue::QueueCommand(y, x1, x2); - } -} - -void R_DrawFogBoundary_rgba(int x1, int x2, short *uclip, short *dclip) -{ - // To do: we do not need to create new spans when using rgba output - instead we should calculate light on a per pixel basis - - // This is essentially the same as R_MapVisPlane but with an extra step - // to create new horizontal spans whenever the light changes enough that - // we need to use a new colormap. - - double lightstep = rw_lightstep; - double light = rw_light + rw_lightstep*(x2 - x1 - 1); - int x = x2 - 1; - int t2 = uclip[x]; - int b2 = dclip[x]; - int rcolormap = GETPALOOKUP(light, wallshade); - int lcolormap; - BYTE *basecolormapdata = basecolormap->Maps; - - if (b2 > t2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - - R_SetColorMapLight(basecolormap, (float)light, wallshade); - - BYTE *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - - for (--x; x >= x1; --x) - { - int t1 = uclip[x]; - int b1 = dclip[x]; - const int xr = x + 1; - int stop; - - light -= rw_lightstep; - lcolormap = GETPALOOKUP(light, wallshade); - if (lcolormap != rcolormap) - { - if (t2 < b2 && rcolormap != 0) - { // Colormap 0 is always the identity map, so rendering it is - // just a waste of time. - R_DrawFogBoundarySection_rgba(t2, b2, xr); - } - if (t1 < t2) t2 = t1; - if (b1 > b2) b2 = b1; - if (t2 < b2) - { - clearbufshort(spanend + t2, b2 - t2, x); - } - rcolormap = lcolormap; - R_SetColorMapLight(basecolormap, (float)light, wallshade); - fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); - } - else - { - if (fake_dc_colormap != basecolormapdata) - { - stop = MIN(t1, b2); - while (t2 < stop) - { - int y = t2++; - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - } - stop = MAX(b1, t2); - while (b2 > stop) - { - int y = --b2; - DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); - } - } - else - { - t2 = MAX(t2, MIN(t1, b2)); - b2 = MIN(b2, MAX(b1, t2)); - } - - stop = MIN(t2, b1); - while (t1 < stop) - { - spanend[t1++] = x; - } - stop = MAX(b2, t2); - while (b1 > stop) - { - spanend[--b1] = x; - } - } - - t2 = uclip[x]; - b2 = dclip[x]; - } - if (t2 < b2 && rcolormap != 0) - { - R_DrawFogBoundarySection_rgba(t2, b2, x1); - } -} - } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index 5d159164ef..0790740689 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -1,31 +1,31 @@ -// Emacs style mode select -*- C++ -*- -//----------------------------------------------------------------------------- -// -// $Id:$ -// -// Copyright (C) 1993-1996 by id Software, Inc. -// -// This source is available for distribution and/or modification -// only under the terms of the DOOM Source Code License as -// published by id Software. All rights reserved. -// -// The source is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// FITNESS FOR A PARTICULAR PURPOSE. See the DOOM Source Code License -// for more details. -// -// DESCRIPTION: -// System specific interface stuff. -// -//----------------------------------------------------------------------------- +/* +** Drawer commands for the RT family of drawers +** Copyright (c) 2016 Magnus Norddahl +** +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. +** +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: +** +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. +** +*/ - -#ifndef __R_DRAW_RGBA__ -#define __R_DRAW_RGBA__ +#pragma once #include "r_draw.h" #include "v_palette.h" #include "r_thread.h" +#include "r_drawers.h" #ifndef NO_SSE #include @@ -38,173 +38,511 @@ EXTERN_CVAR(Float, r_lod_bias) namespace swrenderer { + // Give the compiler a strong hint we want these functions inlined: + #ifndef FORCEINLINE + #if defined(_MSC_VER) + #define FORCEINLINE __forceinline + #elif defined(__GNUC__) + #define FORCEINLINE __attribute__((always_inline)) inline + #else + #define FORCEINLINE inline + #endif + #endif -///////////////////////////////////////////////////////////////////////////// -// Drawer commands: + // Promise compiler we have no aliasing of this pointer + #ifndef RESTRICT + #if defined(_MSC_VER) + #define RESTRICT __restrict + #elif defined(__GNUC__) + #define RESTRICT __restrict__ + #else + #define RESTRICT + #endif + #endif -class ApplySpecialColormapRGBACommand : public DrawerCommand -{ - BYTE *buffer; - int pitch; - int width; - int height; - int start_red; - int start_green; - int start_blue; - int end_red; - int end_green; - int end_blue; + #define DECLARE_DRAW_COMMAND(name, func, base) \ + class name##LLVMCommand : public base \ + { \ + public: \ + using base::base; \ + void Execute(DrawerThread *thread) override \ + { \ + WorkerThreadData d = ThreadData(thread); \ + Drawers::Instance()->func(&args, &d); \ + } \ + }; -public: - ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "ApplySpecialColormapRGBACommand"; } -}; - -template -class DrawerBlendCommand : public CommandType -{ -public: - void Execute(DrawerThread *thread) override + class DrawSpanLLVMCommand : public DrawerCommand { - typename CommandType::LoopIterator loop(this, thread); - if (!loop) return; - BlendMode blend(*this, loop); - do + public: + DrawSpanLLVMCommand(); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + + protected: + DrawSpanArgs args; + + private: + inline static bool sampler_setup(const uint32_t * &source, int &xbits, int &ybits, bool mipmapped); + }; + + class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand + { + public: + void Execute(DrawerThread *thread) override; + }; + + class DrawWall4LLVMCommand : public DrawerCommand + { + protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread); + + public: + DrawWall4LLVMCommand(); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawWall1LLVMCommand : public DrawerCommand + { + protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread); + + public: + DrawWall1LLVMCommand(); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawColumnLLVMCommand : public DrawerCommand + { + protected: + DrawColumnArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread); + FString DebugInfo() override; + + public: + DrawColumnLLVMCommand(); + + void Execute(DrawerThread *thread) override; + }; + + class DrawSkyLLVMCommand : public DrawerCommand + { + protected: + DrawSkyArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread); + + public: + DrawSkyLLVMCommand(uint32_t solid_top, uint32_t solid_bottom); + FString DebugInfo() override; + }; + + DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); + DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); + DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand); + DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); + DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand); + + class DrawFuzzColumnRGBACommand : public DrawerCommand + { + int _x; + int _yl; + int _yh; + uint8_t * RESTRICT _destorg; + int _pitch; + int _fuzzpos; + int _fuzzviewheight; + + public: + DrawFuzzColumnRGBACommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class FillSpanRGBACommand : public DrawerCommand + { + int _x1; + int _x2; + int _y; + uint8_t * RESTRICT _destorg; + fixed_t _light; + int _color; + + public: + FillSpanRGBACommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawSlabRGBACommand : public DrawerCommand + { + int _dx; + fixed_t _v; + int _dy; + fixed_t _vi; + const uint8_t *_voxelptr; + uint32_t *_p; + ShadeConstants _shade_constants; + const uint8_t *_colormap; + fixed_t _light; + int _pitch; + int _start_y; + + public: + DrawSlabRGBACommand(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p, ShadeConstants shade_constants, const uint8_t *colormap, fixed_t light); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawFogBoundaryLineRGBACommand : public DrawerCommand + { + int _y; + int _x; + int _x2; + uint8_t * RESTRICT _destorg; + fixed_t _light; + ShadeConstants _shade_constants; + + public: + DrawFogBoundaryLineRGBACommand(int y, int x, int x2); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawTiltedSpanRGBACommand : public DrawerCommand + { + int _x1; + int _x2; + int _y; + uint8_t * RESTRICT _destorg; + fixed_t _light; + ShadeConstants _shade_constants; + FVector3 _plane_sz; + FVector3 _plane_su; + FVector3 _plane_sv; + bool _plane_shade; + int _planeshade; + float _planelightfloat; + fixed_t _pviewx; + fixed_t _pviewy; + int _xbits; + int _ybits; + const uint32_t * RESTRICT _source; + + public: + DrawTiltedSpanRGBACommand(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class DrawColoredSpanRGBACommand : public DrawerCommand + { + int _y; + int _x1; + int _x2; + uint8_t * RESTRICT _destorg; + fixed_t _light; + int _color; + + public: + DrawColoredSpanRGBACommand(int y, int x1, int x2); + + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class FillTransColumnRGBACommand : public DrawerCommand + { + int _x; + int _y1; + int _y2; + int _color; + int _a; + uint8_t * RESTRICT _destorg; + int _pitch; + fixed_t _light; + + public: + FillTransColumnRGBACommand(int x, int y1, int y2, int color, int a); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class ApplySpecialColormapRGBACommand : public DrawerCommand + { + uint8_t *buffer; + int pitch; + int width; + int height; + int start_red; + int start_green; + int start_blue; + int end_red; + int end_green; + int end_blue; + + public: + ApplySpecialColormapRGBACommand(FSpecialColormap *colormap, DFrameBuffer *screen); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override { return "ApplySpecialColormapRGBACommand"; } + }; + + template + class DrawerBlendCommand : public CommandType + { + public: + void Execute(DrawerThread *thread) override { - blend.Blend(*this, loop); - } while (loop.next()); - } -}; + typename CommandType::LoopIterator loop(this, thread); + if (!loop) return; + BlendMode blend(*this, loop); + do + { + blend.Blend(*this, loop); + } while (loop.next()); + } + }; -///////////////////////////////////////////////////////////////////////////// -// Pixel shading inline functions: - -// Give the compiler a strong hint we want these functions inlined: -#ifndef FORCEINLINE -#if defined(_MSC_VER) -#define FORCEINLINE __forceinline -#elif defined(__GNUC__) -#define FORCEINLINE __attribute__((always_inline)) inline -#else -#define FORCEINLINE inline -#endif -#endif - -// Promise compiler we have no aliasing of this pointer -#ifndef RESTRICT -#if defined(_MSC_VER) -#define RESTRICT __restrict -#elif defined(__GNUC__) -#define RESTRICT __restrict__ -#else -#define RESTRICT -#endif -#endif - -class LightBgra -{ -public: - // calculates the light constant passed to the shade_pal_index function - FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light) + class DrawColumnRt1LLVMCommand : public DrawerCommand { - return 256 - (light >> (FRACBITS - 8)); - } + protected: + DrawColumnArgs args; + WorkerThreadData ThreadData(DrawerThread *thread); - // Calculates a ARGB8 color for the given palette index and light multiplier - FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) + public: + DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand); + DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand); + + ///////////////////////////////////////////////////////////////////////////// + + class RtInitColsRGBACommand : public DrawerCommand { - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; + BYTE * RESTRICT buff; - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; + public: + RtInitColsRGBACommand(BYTE *buff); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; - return 0xff000000 | (red << 16) | (green << 8) | blue; - } - - // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap - FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) + template + class DrawColumnHorizRGBACommand : public DrawerCommand { - const PalEntry &color = GPalette.BaseColors[index]; - uint32_t alpha = color.d & 0xff000000; - uint32_t red = color.r; - uint32_t green = color.g; - uint32_t blue = color.b; - if (constants.simple_shade) + int _count; + fixed_t _iscale; + fixed_t _texturefrac; + const InputPixelType * RESTRICT _source; + int _x; + int _yl; + int _yh; + + public: + DrawColumnHorizRGBACommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + class FillColumnHorizRGBACommand : public DrawerCommand + { + int _x; + int _yl; + int _yh; + int _count; + uint32_t _color; + + public: + FillColumnHorizRGBACommand(); + void Execute(DrawerThread *thread) override; + FString DebugInfo() override; + }; + + ///////////////////////////////////////////////////////////////////////////// + // Pixel shading inline functions: + + class LightBgra + { + public: + // calculates the light constant passed to the shade_pal_index function + FORCEINLINE static uint32_t calc_light_multiplier(dsfixed_t light) { + return 256 - (light >> (FRACBITS - 8)); + } + + // Calculates a ARGB8 color for the given palette index and light multiplier + FORCEINLINE static uint32_t shade_pal_index_simple(uint32_t index, uint32_t light) + { + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + red = red * light / 256; green = green * light / 256; blue = blue * light / 256; + + return 0xff000000 | (red << 16) | (green << 8) | blue; } - else + + // Calculates a ARGB8 color for the given palette index, light multiplier and dynamic colormap + FORCEINLINE static uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; + const PalEntry &color = GPalette.BaseColors[index]; + uint32_t alpha = color.d & 0xff000000; + uint32_t red = color.r; + uint32_t green = color.g; + uint32_t blue = color.b; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; } - return alpha | (red << 16) | (green << 8) | blue; - } - FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light) - { - uint32_t red = RPART(color) * light / 256; - uint32_t green = GPART(color) * light / 256; - uint32_t blue = BPART(color) * light / 256; - return 0xff000000 | (red << 16) | (green << 8) | blue; - } - - FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) - { - uint32_t alpha = color & 0xff000000; - uint32_t red = (color >> 16) & 0xff; - uint32_t green = (color >> 8) & 0xff; - uint32_t blue = color & 0xff; - if (constants.simple_shade) + FORCEINLINE static uint32_t shade_bgra_simple(uint32_t color, uint32_t light) { - red = red * light / 256; - green = green * light / 256; - blue = blue * light / 256; + uint32_t red = RPART(color) * light / 256; + uint32_t green = GPART(color) * light / 256; + uint32_t blue = BPART(color) * light / 256; + return 0xff000000 | (red << 16) | (green << 8) | blue; } - else + + FORCEINLINE static uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) { - uint32_t inv_light = 256 - light; - uint32_t inv_desaturate = 256 - constants.desaturate; + uint32_t alpha = color & 0xff000000; + uint32_t red = (color >> 16) & 0xff; + uint32_t green = (color >> 8) & 0xff; + uint32_t blue = color & 0xff; + if (constants.simple_shade) + { + red = red * light / 256; + green = green * light / 256; + blue = blue * light / 256; + } + else + { + uint32_t inv_light = 256 - light; + uint32_t inv_desaturate = 256 - constants.desaturate; - uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; + uint32_t intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate; - red = (red * inv_desaturate + intensity) / 256; - green = (green * inv_desaturate + intensity) / 256; - blue = (blue * inv_desaturate + intensity) / 256; + red = (red * inv_desaturate + intensity) / 256; + green = (green * inv_desaturate + intensity) / 256; + blue = (blue * inv_desaturate + intensity) / 256; - red = (constants.fade_red * inv_light + red * light) / 256; - green = (constants.fade_green * inv_light + green * light) / 256; - blue = (constants.fade_blue * inv_light + blue * light) / 256; + red = (constants.fade_red * inv_light + red * light) / 256; + green = (constants.fade_green * inv_light + green * light) / 256; + blue = (constants.fade_blue * inv_light + blue * light) / 256; - red = (red * constants.light_red) / 256; - green = (green * constants.light_green) / 256; - blue = (blue * constants.light_blue) / 256; + red = (red * constants.light_red) / 256; + green = (green * constants.light_green) / 256; + blue = (blue * constants.light_blue) / 256; + } + return alpha | (red << 16) | (green << 8) | blue; } - return alpha | (red << 16) | (green << 8) | blue; - } -}; - + }; } - -#endif diff --git a/src/r_draw_tc.cpp b/src/r_draw_tc.cpp new file mode 100644 index 0000000000..a531d4aa9c --- /dev/null +++ b/src/r_draw_tc.cpp @@ -0,0 +1,1411 @@ + +#include + +#include "templates.h" +#include "doomdef.h" +#include "i_system.h" +#include "w_wad.h" +#include "r_local.h" +#include "v_video.h" +#include "doomstat.h" +#include "st_stuff.h" +#include "g_game.h" +#include "g_level.h" +#include "r_data/r_translate.h" +#include "v_palette.h" +#include "r_data/colormaps.h" +#include "r_plane.h" +#include "r_draw_tc.h" +#include "r_draw_rgba.h" +#include "r_thread.h" + +namespace swrenderer +{ + // Needed by R_DrawFogBoundary (which probably shouldn't be part of this file) + extern "C" short spanend[MAXHEIGHT]; + extern float rw_light; + extern float rw_lightstep; + extern int wallshade; + + double dc_texturemid; + + int ylookup[MAXHEIGHT]; + uint8_t shadetables[NUMCOLORMAPS * 16 * 256]; + FDynamicColormap ShadeFakeColormap[16]; + uint8_t identitymap[256]; + FDynamicColormap identitycolormap; + int fuzzoffset[FUZZTABLE + 1]; + int fuzzpos; + int fuzzviewheight; + + namespace drawerargs + { + int dc_pitch; + lighttable_t *dc_colormap; + FSWColormap *dc_fcolormap; + ShadeConstants dc_shade_constants; + fixed_t dc_light; + int dc_x; + int dc_yl; + int dc_yh; + fixed_t dc_iscale; + fixed_t dc_texturefrac; + uint32_t dc_textureheight; + int dc_color; + uint32_t dc_srccolor; + uint32_t dc_srccolor_bgra; + uint32_t *dc_srcblend; + uint32_t *dc_destblend; + fixed_t dc_srcalpha; + fixed_t dc_destalpha; + const uint8_t *dc_source; + const uint8_t *dc_source2; + uint32_t dc_texturefracx; + uint8_t *dc_translation; + uint8_t *dc_dest; + uint8_t *dc_destorg; + int dc_destheight; + int dc_count; + uint32_t vplce[4]; + uint32_t vince[4]; + uint8_t *palookupoffse[4]; + fixed_t palookuplight[4]; + const uint8_t *bufplce[4]; + const uint8_t *bufplce2[4]; + uint32_t buftexturefracx[4]; + uint32_t bufheight[4]; + int vlinebits; + int mvlinebits; + int tmvlinebits; + int ds_y; + int ds_x1; + int ds_x2; + lighttable_t * ds_colormap; + FSWColormap *ds_fcolormap; + ShadeConstants ds_shade_constants; + dsfixed_t ds_light; + dsfixed_t ds_xfrac; + dsfixed_t ds_yfrac; + dsfixed_t ds_xstep; + dsfixed_t ds_ystep; + int ds_xbits; + int ds_ybits; + fixed_t ds_alpha; + double ds_lod; + const uint8_t *ds_source; + bool ds_source_mipmapped; + int ds_color; + bool drawer_needs_pal_input; + unsigned int dc_tspans[4][MAXHEIGHT]; + unsigned int *dc_ctspan[4]; + unsigned int *horizspan[4]; + } + + void R_InitColumnDrawers() + { + colfunc = basecolfunc = R_DrawColumn; + fuzzcolfunc = R_DrawFuzzColumn; + transcolfunc = R_DrawTranslatedColumn; + spanfunc = R_DrawSpan; + hcolfunc_pre = R_DrawColumnHoriz; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + + void R_InitShadeMaps() + { + int i, j; + // set up shading tables for shaded columns + // 16 colormap sets, progressing from full alpha to minimum visible alpha + + uint8_t *table = shadetables; + + // Full alpha + for (i = 0; i < 16; ++i) + { + ShadeFakeColormap[i].Color = ~0u; + ShadeFakeColormap[i].Desaturate = ~0u; + ShadeFakeColormap[i].Next = NULL; + ShadeFakeColormap[i].Maps = table; + + for (j = 0; j < NUMCOLORMAPS; ++j) + { + int a = (NUMCOLORMAPS - j) * 256 / NUMCOLORMAPS * (16 - i); + for (int k = 0; k < 256; ++k) + { + uint8_t v = (((k + 2) * a) + 256) >> 14; + table[k] = MIN(v, 64); + } + table += 256; + } + } + for (i = 0; i < NUMCOLORMAPS * 16 * 256; ++i) + { + assert(shadetables[i] <= 64); + } + + // Set up a guaranteed identity map + for (i = 0; i < 256; ++i) + { + identitymap[i] = i; + } + } + + void R_InitFuzzTable(int fuzzoff) + { + /* + FUZZOFF,-FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF, + FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF, + FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF, + FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF,FUZZOFF,-FUZZOFF,FUZZOFF + */ + + static const int8_t fuzzinit[FUZZTABLE] = { + 1,-1, 1,-1, 1, 1,-1, + 1, 1,-1, 1, 1, 1,-1, + 1, 1, 1,-1,-1,-1,-1, + 1,-1,-1, 1, 1, 1, 1,-1, + 1,-1, 1, 1,-1,-1, 1, + 1,-1,-1,-1,-1, 1, 1, + 1, 1,-1, 1, 1,-1, 1 + }; + + for (int i = 0; i < FUZZTABLE; i++) + { + fuzzoffset[i] = fuzzinit[i] * fuzzoff; + } + } + + namespace + { + bool R_SetBlendFunc(int op, fixed_t fglevel, fixed_t bglevel, int flags) + { + using namespace drawerargs; + + // r_drawtrans is a seriously bad thing to turn off. I wonder if I should + // just remove it completely. + if (!r_drawtrans || (op == STYLEOP_Add && fglevel == FRACUNIT && bglevel == 0 && !(flags & STYLEF_InvertSource))) + { + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillColumn; + hcolfunc_post1 = rt_copy1col; + hcolfunc_post4 = rt_copy4cols; + } + else if (dc_translation == NULL) + { + colfunc = basecolfunc; + hcolfunc_post1 = rt_map1col; + hcolfunc_post4 = rt_map4cols; + } + else + { + colfunc = transcolfunc; + hcolfunc_post1 = rt_tlate1col; + hcolfunc_post4 = rt_tlate4cols; + drawer_needs_pal_input = true; + } + return true; + } + if (flags & STYLEF_InvertSource) + { + dc_srcblend = Col2RGB8_Inverse[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else if (op == STYLEOP_Add && fglevel + bglevel <= FRACUNIT) + { + dc_srcblend = Col2RGB8[fglevel >> 10]; + dc_destblend = Col2RGB8[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + else + { + dc_srcblend = Col2RGB8_LessPrecision[fglevel >> 10]; + dc_destblend = Col2RGB8_LessPrecision[bglevel >> 10]; + dc_srcalpha = fglevel; + dc_destalpha = bglevel; + } + switch (op) + { + case STYLEOP_Add: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (fglevel + bglevel <= FRACUNIT) + { // Colors won't overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddColumn; + hcolfunc_post1 = rt_add1col; + hcolfunc_post4 = rt_add4cols; + } + else + { + colfunc = R_DrawTlatedAddColumn; + hcolfunc_post1 = rt_tlateadd1col; + hcolfunc_post4 = rt_tlateadd4cols; + drawer_needs_pal_input = true; + } + } + else + { // Colors might overflow when added + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawAddClampColumn; + hcolfunc_post1 = rt_addclamp1col; + hcolfunc_post4 = rt_addclamp4cols; + } + else + { + colfunc = R_DrawAddClampTranslatedColumn; + hcolfunc_post1 = rt_tlateaddclamp1col; + hcolfunc_post4 = rt_tlateaddclamp4cols; + drawer_needs_pal_input = true; + } + } + return true; + + case STYLEOP_Sub: + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else + { + colfunc = R_DrawSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlatesubclamp1col; + hcolfunc_post4 = rt_tlatesubclamp4cols; + drawer_needs_pal_input = true; + } + return true; + + case STYLEOP_RevSub: + if (fglevel == 0 && bglevel == FRACUNIT) + { + return false; + } + if (flags & STYLEF_ColorIsFixed) + { + colfunc = R_FillRevSubClampColumn; + hcolfunc_post1 = rt_subclamp1col; + hcolfunc_post4 = rt_subclamp4cols; + } + else if (dc_translation == NULL) + { + colfunc = R_DrawRevSubClampColumn; + hcolfunc_post1 = rt_revsubclamp1col; + hcolfunc_post4 = rt_revsubclamp4cols; + } + else + { + colfunc = R_DrawRevSubClampTranslatedColumn; + hcolfunc_post1 = rt_tlaterevsubclamp1col; + hcolfunc_post4 = rt_tlaterevsubclamp4cols; + drawer_needs_pal_input = true; + } + return true; + + default: + return false; + } + } + + fixed_t GetAlpha(int type, fixed_t alpha) + { + switch (type) + { + case STYLEALPHA_Zero: return 0; + case STYLEALPHA_One: return OPAQUE; + case STYLEALPHA_Src: return alpha; + case STYLEALPHA_InvSrc: return OPAQUE - alpha; + default: return 0; + } + } + + FDynamicColormap *basecolormapsave; + } + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color) + { + using namespace drawerargs; + + fixed_t fglevel, bglevel; + + drawer_needs_pal_input = false; + + style.CheckFuzz(); + + if (style.BlendOp == STYLEOP_Shadow) + { + style = LegacyRenderStyles[STYLE_TranslucentStencil]; + alpha = TRANSLUC33; + color = 0; + } + + if (style.Flags & STYLEF_TransSoulsAlpha) + { + alpha = fixed_t(transsouls * OPAQUE); + } + else if (style.Flags & STYLEF_Alpha1) + { + alpha = FRACUNIT; + } + else + { + alpha = clamp(alpha, 0, OPAQUE); + } + + if (translation != -1) + { + dc_translation = NULL; + if (translation != 0) + { + FRemapTable *table = TranslationToTable(translation); + if (table != NULL && !table->Inactive) + { + if (r_swtruecolor) + dc_translation = (uint8_t*)table->Palette; + else + dc_translation = table->Remap; + } + } + } + basecolormapsave = basecolormap; + hcolfunc_pre = R_DrawColumnHoriz; + + // Check for special modes + if (style.BlendOp == STYLEOP_Fuzz) + { + colfunc = fuzzcolfunc; + return DoDraw0; + } + else if (style == LegacyRenderStyles[STYLE_Shaded]) + { + // Shaded drawer only gets 16 levels of alpha because it saves memory. + if ((alpha >>= 12) == 0) + return DontDraw; + colfunc = R_DrawShadedColumn; + hcolfunc_post1 = rt_shaded1col; + hcolfunc_post4 = rt_shaded4cols; + drawer_needs_pal_input = true; + dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; + basecolormap = &ShadeFakeColormap[16 - alpha]; + if (fixedlightlev >= 0 && fixedcolormap == NULL) + { + R_SetColorMapLight(basecolormap, 0, FIXEDLIGHT2SHADE(fixedlightlev)); + } + else + { + R_SetColorMapLight(basecolormap, 0, 0); + } + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; + } + + fglevel = GetAlpha(style.SrcAlpha, alpha); + bglevel = GetAlpha(style.DestAlpha, alpha); + + if (style.Flags & STYLEF_ColorIsFixed) + { + uint32_t x = fglevel >> 10; + uint32_t r = RPART(color); + uint32_t g = GPART(color); + uint32_t b = BPART(color); + // dc_color is used by the rt_* routines. It is indexed into dc_srcblend. + dc_color = RGB32k.RGB[r >> 3][g >> 3][b >> 3]; + if (style.Flags & STYLEF_InvertSource) + { + r = 255 - r; + g = 255 - g; + b = 255 - b; + } + uint32_t alpha = clamp(fglevel >> (FRACBITS - 8), 0, 255); + dc_srccolor_bgra = (alpha << 24) | (r << 16) | (g << 8) | b; + // dc_srccolor is used by the R_Fill* routines. It is premultiplied + // with the alpha. + dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; + hcolfunc_pre = R_FillColumnHoriz; + R_SetColorMapLight(&identitycolormap, 0, 0); + } + + if (!R_SetBlendFunc(style.BlendOp, fglevel, bglevel, style.Flags)) + { + return DontDraw; + } + bool active_columnmethod = r_columnmethod && !r_swtruecolor; + return active_columnmethod ? DoDraw1 : DoDraw0; + } + + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color) + { + return R_SetPatchStyle(style, FLOAT2FIXED(alpha), translation, color); + } + + void R_FinishSetPatchStyle() + { + basecolormap = basecolormapsave; + } + + const uint8_t *R_GetColumn(FTexture *tex, int col) + { + int width; + + // If the texture's width isn't a power of 2, then we need to make it a + // positive offset for proper clamping. + if (col < 0 && (width = tex->GetWidth()) != (1 << tex->WidthBits)) + { + col = width + (col % width); + } + + if (r_swtruecolor) + return (const uint8_t *)tex->GetColumnBgra(col, nullptr); + else + return tex->GetColumn(col, nullptr); + } + + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()) + { + if (colfunc == R_DrawAddColumn) + { + *tmvline1 = tmvline1_add; + *tmvline4 = tmvline4_add; + return true; + } + if (colfunc == R_DrawAddClampColumn) + { + *tmvline1 = tmvline1_addclamp; + *tmvline4 = tmvline4_addclamp; + return true; + } + if (colfunc == R_DrawSubClampColumn) + { + *tmvline1 = tmvline1_subclamp; + *tmvline4 = tmvline4_subclamp; + return true; + } + if (colfunc == R_DrawRevSubClampColumn) + { + *tmvline1 = tmvline1_revsubclamp; + *tmvline4 = tmvline4_revsubclamp; + return true; + } + return false; + } + + void setupvline(int fracbits) + { + drawerargs::vlinebits = fracbits; + } + + void setupmvline(int fracbits) + { + drawerargs::mvlinebits = fracbits; + } + + void setuptmvline(int fracbits) + { + drawerargs::tmvlinebits = fracbits; + } + + void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade) + { + using namespace drawerargs; + + dc_fcolormap = base_colormap; + if (r_swtruecolor) + { + dc_shade_constants.light_red = dc_fcolormap->Color.r * 256 / 255; + dc_shade_constants.light_green = dc_fcolormap->Color.g * 256 / 255; + dc_shade_constants.light_blue = dc_fcolormap->Color.b * 256 / 255; + dc_shade_constants.light_alpha = dc_fcolormap->Color.a * 256 / 255; + dc_shade_constants.fade_red = dc_fcolormap->Fade.r; + dc_shade_constants.fade_green = dc_fcolormap->Fade.g; + dc_shade_constants.fade_blue = dc_fcolormap->Fade.b; + dc_shade_constants.fade_alpha = dc_fcolormap->Fade.a; + dc_shade_constants.desaturate = MIN(abs(dc_fcolormap->Desaturate), 255) * 255 / 256; + dc_shade_constants.simple_shade = (dc_fcolormap->Color.d == 0x00ffffff && dc_fcolormap->Fade.d == 0x00000000 && dc_fcolormap->Desaturate == 0); + dc_colormap = base_colormap->Maps; + dc_light = LIGHTSCALE(light, shade); + } + else + { + dc_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + } + + void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade) + { + using namespace drawerargs; + + ds_fcolormap = base_colormap; + if (r_swtruecolor) + { + ds_shade_constants.light_red = ds_fcolormap->Color.r * 256 / 255; + ds_shade_constants.light_green = ds_fcolormap->Color.g * 256 / 255; + ds_shade_constants.light_blue = ds_fcolormap->Color.b * 256 / 255; + ds_shade_constants.light_alpha = ds_fcolormap->Color.a * 256 / 255; + ds_shade_constants.fade_red = ds_fcolormap->Fade.r; + ds_shade_constants.fade_green = ds_fcolormap->Fade.g; + ds_shade_constants.fade_blue = ds_fcolormap->Fade.b; + ds_shade_constants.fade_alpha = ds_fcolormap->Fade.a; + ds_shade_constants.desaturate = MIN(abs(ds_fcolormap->Desaturate), 255) * 255 / 256; + ds_shade_constants.simple_shade = (ds_fcolormap->Color.d == 0x00ffffff && ds_fcolormap->Fade.d == 0x00000000 && ds_fcolormap->Desaturate == 0); + ds_colormap = base_colormap->Maps; + ds_light = LIGHTSCALE(light, shade); + } + else + { + ds_colormap = base_colormap->Maps + (GETPALOOKUP(light, shade) << COLORMAPSHIFT); + } + } + + void R_SetTranslationMap(lighttable_t *translation) + { + using namespace drawerargs; + + if (r_swtruecolor) + { + dc_fcolormap = nullptr; + dc_colormap = nullptr; + dc_translation = translation; + dc_shade_constants.light_red = 256; + dc_shade_constants.light_green = 256; + dc_shade_constants.light_blue = 256; + dc_shade_constants.light_alpha = 256; + dc_shade_constants.fade_red = 0; + dc_shade_constants.fade_green = 0; + dc_shade_constants.fade_blue = 0; + dc_shade_constants.fade_alpha = 256; + dc_shade_constants.desaturate = 0; + dc_shade_constants.simple_shade = true; + dc_light = 0; + } + else + { + dc_fcolormap = nullptr; + dc_colormap = translation; + } + } + + void rt_initcols(uint8_t *buffer) + { + using namespace drawerargs; + + for (int y = 3; y >= 0; y--) + horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; + + DrawerCommandQueue::QueueCommand(buffer); + } + + void rt_span_coverage(int x, int start, int stop) + { + using namespace drawerargs; + + unsigned int **tspan = &dc_ctspan[x & 3]; + (*tspan)[0] = start; + (*tspan)[1] = stop; + *tspan += 2; + } + + void rt_flip_posts() + { + using namespace drawerargs; + + unsigned int *front = horizspan[dc_x & 3]; + unsigned int *back = dc_ctspan[dc_x & 3] - 2; + + while (front < back) + { + swapvalues(front[0], back[0]); + swapvalues(front[1], back[1]); + front += 2; + back -= 2; + } + } + + void rt_draw4cols(int sx) + { + using namespace drawerargs; + + int x, bad; + unsigned int maxtop, minbot, minnexttop; + + // Place a dummy "span" in each column. These don't get + // drawn. They're just here to avoid special cases in the + // max/min calculations below. + for (x = 0; x < 4; ++x) + { + dc_ctspan[x][0] = screen->GetHeight()+1; + dc_ctspan[x][1] = screen->GetHeight(); + } + + for (;;) + { + // If a column is out of spans, mark it as such + bad = 0; + minnexttop = 0xffffffff; + for (x = 0; x < 4; ++x) + { + if (horizspan[x] >= dc_ctspan[x]) + { + bad |= 1 << x; + } + else if ((horizspan[x]+2)[0] < minnexttop) + { + minnexttop = (horizspan[x]+2)[0]; + } + } + // Once all columns are out of spans, we're done + if (bad == 15) + { + return; + } + + // Find the largest shared area for the spans in each column + maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), + MAX (horizspan[2][0], horizspan[3][0])); + minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), + MIN (horizspan[2][1], horizspan[3][1])); + + // If there is no shared area with these spans, draw each span + // individually and advance to the next spans until we reach a shared area. + // However, only draw spans down to the highest span in the next set of + // spans. If we allow the entire height of a span to be drawn, it could + // prevent any more shared areas from being drawn in these four columns. + // + // Example: Suppose we have the following arrangement: + // A CD + // A CD + // B D + // B D + // aB D + // aBcD + // aBcD + // aBc + // + // If we draw the entire height of the spans, we end up drawing this first: + // A CD + // A CD + // B D + // B D + // B D + // B D + // B D + // B D + // B + // + // This leaves only the "a" and "c" columns to be drawn, and they are not + // part of a shared area, but if we can include B and D with them, we can + // get a shared area. So we cut off everything in the first set just + // above the "a" column and end up drawing this first: + // A CD + // A CD + // B D + // B D + // + // Then the next time through, we have the following arrangement with an + // easily shared area to draw: + // aB D + // aBcD + // aBcD + // aBc + if (bad != 0 || maxtop > minbot) + { + int drawcount = 0; + for (x = 0; x < 4; ++x) + { + if (!(bad & 1)) + { + if (horizspan[x][1] < minnexttop) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); + horizspan[x] += 2; + drawcount++; + } + else if (minnexttop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); + horizspan[x][0] = minnexttop; + drawcount++; + } + } + bad >>= 1; + } + // Drawcount *should* always be non-zero. The reality is that some situations + // can make this not true. Unfortunately, I'm not sure what those situations are. + if (drawcount == 0) + { + return; + } + continue; + } + + // Draw any span fragments above the shared area. + for (x = 0; x < 4; ++x) + { + if (maxtop > horizspan[x][0]) + { + hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); + } + } + + // Draw the shared area. + hcolfunc_post4 (sx, maxtop, minbot); + + // For each column, if part of the span is past the shared area, + // set its top to just below the shared area. Otherwise, advance + // to the next span in that column. + for (x = 0; x < 4; ++x) + { + if (minbot < horizspan[x][1]) + { + horizspan[x][0] = minbot+1; + } + else + { + horizspan[x] += 2; + } + } + } + } + + void R_SetupSpanBits(FTexture *tex) + { + using namespace drawerargs; + + tex->GetWidth(); + ds_xbits = tex->WidthBits; + ds_ybits = tex->HeightBits; + if ((1 << ds_xbits) > tex->GetWidth()) + { + ds_xbits--; + } + if ((1 << ds_ybits) > tex->GetHeight()) + { + ds_ybits--; + } + } + + void R_SetSpanColormap(FDynamicColormap *colormap, int shade) + { + R_SetDSColorMapLight(colormap, 0, shade); + } + + void R_SetSpanSource(FTexture *tex) + { + using namespace drawerargs; + + ds_source = r_swtruecolor ? (const uint8_t*)tex->GetPixelsBgra() : tex->GetPixels(); + ds_source_mipmapped = tex->Mipmapped() && tex->GetWidth() > 1 && tex->GetHeight() > 1; + } + + ///////////////////////////////////////////////////////////////////////// + + void R_FillColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawColumnHoriz() + { + using namespace drawerargs; + + if (dc_count <= 0) + return; + + int x = dc_x & 3; + unsigned int **span = &dc_ctspan[x]; + (*span)[0] = dc_yl; + (*span)[1] = dc_yh; + *span += 2; + + if (drawer_needs_pal_input) + DrawerCommandQueue::QueueCommand>(); + else + DrawerCommandQueue::QueueCommand>(); + } + + // Copies one span at hx to the screen at sx. + void rt_copy1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Copies all four spans to the screen starting at sx. + void rt_copy4cols(int sx, int yl, int yh) + { + // To do: we could do this with SSE using __m128i + rt_copy1col(0, sx, yl, yh); + rt_copy1col(1, sx + 1, yl, yh); + rt_copy1col(2, sx + 2, yl, yh); + rt_copy1col(3, sx + 3, yl, yh); + } + + // Maps one span at hx to the screen at sx. + void rt_map1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Maps all four spans to the screen starting at sx. + void rt_map4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates one span at hx to the screen at sx. + void rt_tlate1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates all four spans to the screen starting at sx. + void rt_tlate4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Adds one span at hx to the screen at sx without clamping. + void rt_add1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx without clamping. + void rt_add4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx without clamping. + void rt_tlateadd1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx without clamping. + void rt_tlateadd4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Shades one span at hx to the screen at sx. + void rt_shaded1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Shades all four spans to the screen starting at sx. + void rt_shaded4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Adds one span at hx to the screen at sx with clamping. + void rt_addclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Adds all four spans to the screen starting at sx with clamping. + void rt_addclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and adds one span at hx to the screen at sx with clamping. + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates and adds all four spans to the screen starting at sx with clamping. + void rt_tlateaddclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Subtracts one span at hx to the screen at sx with clamping. + void rt_subclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans to the screen starting at sx with clamping. + void rt_subclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx to the screen at sx with clamping. + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans to the screen starting at sx with clamping. + void rt_tlatesubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Subtracts one span at hx from the screen at sx with clamping. + void rt_revsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Subtracts all four spans from the screen starting at sx with clamping. + void rt_revsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + // Translates and subtracts one span at hx from the screen at sx with clamping. + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + } + + // Translates and subtracts all four spans from the screen starting at sx with clamping. + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) + { + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); + } + + uint32_t vlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void vlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + uint32_t mvlinec1() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void mvlinec4() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_add() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_addclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_subclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + fixed_t tmvline1_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + return dc_texturefrac + dc_count * dc_iscale; + } + + void tmvline4_revsubclamp() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + for (int i = 0; i < 4; i++) + vplce[i] += vince[i] * dc_count; + } + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) + { + DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); + } + + void R_DrawColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawFuzzColumn() + { + using namespace drawerargs; + + DrawerCommandQueue::QueueCommand(); + + dc_yl = MAX(dc_yl, 1); + dc_yh = MIN(dc_yh, fuzzviewheight); + if (dc_yl <= dc_yh) + fuzzpos = (fuzzpos + dc_yh - dc_yl + 1) % FUZZTABLE; + } + + void R_DrawAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTlatedAddColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawShadedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawAddClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawRevSubClampTranslatedColumn() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMasked() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedTranslucent() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawSpanMaskedAddClamp() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_FillSpan() + { + DrawerCommandQueue::QueueCommand(); + } + + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy) + { + DrawerCommandQueue::QueueCommand(y, x1, x2, plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + } + + void R_DrawColoredSpan(int y, int x1, int x2) + { + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + + namespace + { + ShadeConstants slab_rgba_shade_constants; + const uint8_t *slab_rgba_colormap; + fixed_t slab_rgba_light; + } + + void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade) + { + slab_rgba_shade_constants.light_red = base_colormap->Color.r * 256 / 255; + slab_rgba_shade_constants.light_green = base_colormap->Color.g * 256 / 255; + slab_rgba_shade_constants.light_blue = base_colormap->Color.b * 256 / 255; + slab_rgba_shade_constants.light_alpha = base_colormap->Color.a * 256 / 255; + slab_rgba_shade_constants.fade_red = base_colormap->Fade.r; + slab_rgba_shade_constants.fade_green = base_colormap->Fade.g; + slab_rgba_shade_constants.fade_blue = base_colormap->Fade.b; + slab_rgba_shade_constants.fade_alpha = base_colormap->Fade.a; + slab_rgba_shade_constants.desaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256; + slab_rgba_shade_constants.simple_shade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0); + slab_rgba_colormap = base_colormap->Maps; + slab_rgba_light = LIGHTSCALE(light, shade); + } + + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p) + { + DrawerCommandQueue::QueueCommand(dx, v, dy, vi, vptr, p, slab_rgba_shade_constants, slab_rgba_colormap, slab_rgba_light); + } + + void R_DrawFogBoundarySection(int y, int y2, int x1) + { + for (; y < y2; ++y) + { + int x2 = spanend[y]; + DrawerCommandQueue::QueueCommand(y, x1, x2); + } + } + + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip) + { + // This is essentially the same as R_MapVisPlane but with an extra step + // to create new horizontal spans whenever the light changes enough that + // we need to use a new colormap. + + double lightstep = rw_lightstep; + double light = rw_light + rw_lightstep*(x2 - x1 - 1); + int x = x2 - 1; + int t2 = uclip[x]; + int b2 = dclip[x]; + int rcolormap = GETPALOOKUP(light, wallshade); + int lcolormap; + uint8_t *basecolormapdata = basecolormap->Maps; + + if (b2 > t2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + + R_SetColorMapLight(basecolormap, (float)light, wallshade); + + uint8_t *fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + + for (--x; x >= x1; --x) + { + int t1 = uclip[x]; + int b1 = dclip[x]; + const int xr = x + 1; + int stop; + + light -= rw_lightstep; + lcolormap = GETPALOOKUP(light, wallshade); + if (lcolormap != rcolormap) + { + if (t2 < b2 && rcolormap != 0) + { // Colormap 0 is always the identity map, so rendering it is + // just a waste of time. + R_DrawFogBoundarySection(t2, b2, xr); + } + if (t1 < t2) t2 = t1; + if (b1 > b2) b2 = b1; + if (t2 < b2) + { + clearbufshort(spanend + t2, b2 - t2, x); + } + rcolormap = lcolormap; + R_SetColorMapLight(basecolormap, (float)light, wallshade); + fake_dc_colormap = basecolormap->Maps + (GETPALOOKUP(light, wallshade) << COLORMAPSHIFT); + } + else + { + if (fake_dc_colormap != basecolormapdata) + { + stop = MIN(t1, b2); + while (t2 < stop) + { + int y = t2++; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + stop = MAX(b1, t2); + while (b2 > stop) + { + int y = --b2; + DrawerCommandQueue::QueueCommand(y, xr, spanend[y]); + } + } + else + { + t2 = MAX(t2, MIN(t1, b2)); + b2 = MIN(b2, MAX(b1, t2)); + } + + stop = MIN(t2, b1); + while (t1 < stop) + { + spanend[t1++] = x; + } + stop = MAX(b2, t2); + while (b1 > stop) + { + spanend[--b1] = x; + } + } + + t2 = uclip[x]; + b2 = dclip[x]; + } + if (t2 < b2 && rcolormap != 0) + { + R_DrawFogBoundarySection(t2, b2, x1); + } + } + + void R_DrawParticle(vissprite_t *sprite) + { + if (r_swtruecolor) + R_DrawParticle_rgba(sprite); + else + R_DrawParticle_C(sprite); + } +} diff --git a/src/r_draw_tc.h b/src/r_draw_tc.h new file mode 100644 index 0000000000..8c1af58fcb --- /dev/null +++ b/src/r_draw_tc.h @@ -0,0 +1,239 @@ + +#pragma once + +#include "r_defs.h" + +struct FSWColormap; + +EXTERN_CVAR(Bool, r_multithreaded); +EXTERN_CVAR(Bool, r_magfilter); +EXTERN_CVAR(Bool, r_minfilter); +EXTERN_CVAR(Bool, r_mipmap); +EXTERN_CVAR(Float, r_lod_bias); +EXTERN_CVAR(Int, r_drawfuzz); +EXTERN_CVAR(Bool, r_drawtrans); +EXTERN_CVAR(Float, transsouls); +EXTERN_CVAR(Int, r_columnmethod); + +namespace swrenderer +{ + struct vissprite_t; + + struct ShadeConstants + { + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + bool simple_shade; + }; + + extern double dc_texturemid; + + namespace drawerargs + { + extern int dc_pitch; + extern lighttable_t *dc_colormap; + extern FSWColormap *dc_fcolormap; + extern ShadeConstants dc_shade_constants; + extern fixed_t dc_light; + extern int dc_x; + extern int dc_yl; + extern int dc_yh; + extern fixed_t dc_iscale; + extern fixed_t dc_texturefrac; + extern uint32_t dc_textureheight; + extern int dc_color; + extern uint32_t dc_srccolor; + extern uint32_t dc_srccolor_bgra; + extern uint32_t *dc_srcblend; + extern uint32_t *dc_destblend; + extern fixed_t dc_srcalpha; + extern fixed_t dc_destalpha; + extern const uint8_t *dc_source; + extern const uint8_t *dc_source2; + extern uint32_t dc_texturefracx; + extern uint8_t *dc_translation; + extern uint8_t *dc_dest; + extern uint8_t *dc_destorg; + extern int dc_destheight; + extern int dc_count; + + extern bool drawer_needs_pal_input; + + extern uint32_t vplce[4]; + extern uint32_t vince[4]; + extern uint8_t *palookupoffse[4]; + extern fixed_t palookuplight[4]; + extern const uint8_t *bufplce[4]; + extern const uint8_t *bufplce2[4]; + extern uint32_t buftexturefracx[4]; + extern uint32_t bufheight[4]; + extern int vlinebits; + extern int mvlinebits; + extern int tmvlinebits; + + extern int ds_y; + extern int ds_x1; + extern int ds_x2; + extern lighttable_t * ds_colormap; + extern FSWColormap *ds_fcolormap; + extern ShadeConstants ds_shade_constants; + extern dsfixed_t ds_light; + extern dsfixed_t ds_xfrac; + extern dsfixed_t ds_yfrac; + extern dsfixed_t ds_xstep; + extern dsfixed_t ds_ystep; + extern int ds_xbits; + extern int ds_ybits; + extern fixed_t ds_alpha; + extern double ds_lod; + extern const uint8_t *ds_source; + extern bool ds_source_mipmapped; + extern int ds_color; + + extern unsigned int dc_tspans[4][MAXHEIGHT]; + extern unsigned int *dc_ctspan[4]; + extern unsigned int *horizspan[4]; + } + + extern int ylookup[MAXHEIGHT]; + extern uint8_t shadetables[/*NUMCOLORMAPS*16*256*/]; + extern FDynamicColormap ShadeFakeColormap[16]; + extern uint8_t identitymap[256]; + extern FDynamicColormap identitycolormap; + + // Spectre/Invisibility. + #define FUZZTABLE 50 + extern int fuzzoffset[FUZZTABLE + 1]; + extern int fuzzpos; + extern int fuzzviewheight; + + extern bool r_swtruecolor; + + void R_InitColumnDrawers(); + void R_InitShadeMaps(); + void R_InitFuzzTable(int fuzzoff); + + enum ESPSResult + { + DontDraw, // not useful to draw this + DoDraw0, // draw this as if r_columnmethod is 0 + DoDraw1, // draw this as if r_columnmethod is 1 + }; + + ESPSResult R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); + ESPSResult R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); + void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade + bool R_GetTransMaskDrawers(fixed_t(**tmvline1)(), void(**tmvline4)()); + + const uint8_t *R_GetColumn(FTexture *tex, int col); + void wallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void maskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + void transmaskwallscan(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const uint8_t *(*getcol)(FTexture *tex, int col) = R_GetColumn); + + void rt_initcols(uint8_t *buffer = nullptr); + void rt_span_coverage(int x, int start, int stop); + void rt_draw4cols(int sx); + void rt_flip_posts(); + void rt_copy1col(int hx, int sx, int yl, int yh); + void rt_copy4cols(int sx, int yl, int yh); + void rt_shaded1col(int hx, int sx, int yl, int yh); + void rt_shaded4cols(int sx, int yl, int yh); + void rt_map1col(int hx, int sx, int yl, int yh); + void rt_add1col(int hx, int sx, int yl, int yh); + void rt_addclamp1col(int hx, int sx, int yl, int yh); + void rt_subclamp1col(int hx, int sx, int yl, int yh); + void rt_revsubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlate1col(int hx, int sx, int yl, int yh); + void rt_tlateadd1col(int hx, int sx, int yl, int yh); + void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh); + void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh); + void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh); + void rt_map4cols(int sx, int yl, int yh); + void rt_add4cols(int sx, int yl, int yh); + void rt_addclamp4cols(int sx, int yl, int yh); + void rt_subclamp4cols(int sx, int yl, int yh); + void rt_revsubclamp4cols(int sx, int yl, int yh); + void rt_tlate4cols(int sx, int yl, int yh); + void rt_tlateadd4cols(int sx, int yl, int yh); + void rt_tlateaddclamp4cols(int sx, int yl, int yh); + void rt_tlatesubclamp4cols(int sx, int yl, int yh); + void rt_tlaterevsubclamp4cols(int sx, int yl, int yh); + void R_DrawColumnHoriz(); + void R_DrawColumn(); + void R_DrawFuzzColumn(); + void R_DrawTranslatedColumn(); + void R_DrawShadedColumn(); + void R_FillColumn(); + void R_FillAddColumn(); + void R_FillAddClampColumn(); + void R_FillSubClampColumn(); + void R_FillRevSubClampColumn(); + void R_DrawAddColumn(); + void R_DrawTlatedAddColumn(); + void R_DrawAddClampColumn(); + void R_DrawAddClampTranslatedColumn(); + void R_DrawSubClampColumn(); + void R_DrawSubClampTranslatedColumn(); + void R_DrawRevSubClampColumn(); + void R_DrawRevSubClampTranslatedColumn(); + void R_DrawSpan(); + void R_DrawSpanMasked(); + void R_DrawSpanTranslucent(); + void R_DrawSpanMaskedTranslucent(); + void R_DrawSpanAddClamp(); + void R_DrawSpanMaskedAddClamp(); + void R_FillSpan(); + void R_DrawTiltedSpan(int y, int x1, int x2, const FVector3 &plane_sz, const FVector3 &plane_su, const FVector3 &plane_sv, bool plane_shade, int planeshade, float planelightfloat, fixed_t pviewx, fixed_t pviewy); + void R_DrawColoredSpan(int y, int x1, int x2); + void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade); + void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); + void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); + uint32_t vlinec1(); + void vlinec4(); + uint32_t mvlinec1(); + void mvlinec4(); + fixed_t tmvline1_add(); + void tmvline4_add(); + fixed_t tmvline1_addclamp(); + void tmvline4_addclamp(); + fixed_t tmvline1_subclamp(); + void tmvline4_subclamp(); + fixed_t tmvline1_revsubclamp(); + void tmvline4_revsubclamp(); + void R_FillColumnHoriz(); + void R_FillSpan(); + + inline uint32_t dovline1() { return vlinec1(); } + inline void dovline4() { vlinec4(); } + inline uint32_t domvline1() { return mvlinec1(); } + inline void domvline4() { mvlinec4(); } + + void setupvline(int fracbits); + void setupmvline(int fracbits); + void setuptmvline(int fracbits); + + void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + + // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) + void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); + void R_SetDSColorMapLight(FSWColormap *base_colormap, float light, int shade); + void R_SetTranslationMap(lighttable_t *translation); + + void R_SetupSpanBits(FTexture *tex); + void R_SetSpanColormap(FDynamicColormap *colormap, int shade); + void R_SetSpanSource(FTexture *tex); + + void R_MapTiltedPlane(int y, int x1); + void R_MapColoredPlane(int y, int x1); + void R_DrawParticle(vissprite_t *); +} diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 539135afe9..d5aeed8a91 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -1,37 +1,23 @@ /* -** r_drawt_rgba.cpp -** Faster column drawers for modern processors, true color edition +** Drawer commands for the RT family of drawers +** Copyright (c) 2016 Magnus Norddahl ** -**--------------------------------------------------------------------------- -** Copyright 1998-2006 Randy Heit -** All rights reserved. +** This software is provided 'as-is', without any express or implied +** warranty. In no event will the authors be held liable for any damages +** arising from the use of this software. ** -** Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions -** are met: +** Permission is granted to anyone to use this software for any purpose, +** including commercial applications, and to alter it and redistribute it +** freely, subject to the following restrictions: ** -** 1. Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** 2. Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in the -** documentation and/or other materials provided with the distribution. -** 3. The name of the author may not be used to endorse or promote products -** derived from this software without specific prior written permission. +** 1. The origin of this software must not be misrepresented; you must not +** claim that you wrote the original software. If you use this software +** in a product, an acknowledgment in the product documentation would be +** appreciated but is not required. +** 2. Altered source versions must be plainly marked as such, and must not be +** misrepresented as being the original software. +** 3. This notice may not be removed or altered from any source distribution. ** -** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**--------------------------------------------------------------------------- -** -** True color versions of the similar functions in r_drawt.cpp -** Please see r_drawt.cpp for a description of the globals used. */ #include "templates.h" @@ -47,15 +33,7 @@ namespace swrenderer { - -///////////////////////////////////////////////////////////////////////////// - -class DrawColumnRt1LLVMCommand : public DrawerCommand -{ -protected: - DrawColumnArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread) + WorkerThreadData DrawColumnRt1LLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; d.core = thread->core; @@ -66,8 +44,7 @@ protected: return d; } -public: - DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh) + DrawColumnRt1LLVMCommand::DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh) { using namespace drawerargs; @@ -105,90 +82,38 @@ public: DetectRangeError(args.dest, args.dest_y, args.count); } - void Execute(DrawerThread *thread) override + void DrawColumnRt1LLVMCommand::Execute(DrawerThread *thread) { WorkerThreadData d = ThreadData(thread); Drawers::Instance()->DrawColumnRt1(&args, &d); } - FString DebugInfo() override + FString DrawColumnRt1LLVMCommand::DebugInfo() { return "DrawColumnRt\n" + args.ToString(); } -}; -#define DECLARE_DRAW_COMMAND(name, func, base) \ -class name##LLVMCommand : public base \ -{ \ -public: \ - using base::base; \ - void Execute(DrawerThread *thread) override \ - { \ - WorkerThreadData d = ThreadData(thread); \ - Drawers::Instance()->func(&args, &d); \ - } \ -}; + ///////////////////////////////////////////////////////////////////////////// -DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand); -DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand); - -///////////////////////////////////////////////////////////////////////////// - -class RtInitColsRGBACommand : public DrawerCommand -{ - BYTE * RESTRICT buff; - -public: - RtInitColsRGBACommand(BYTE *buff) + RtInitColsRGBACommand::RtInitColsRGBACommand(BYTE *buff) { this->buff = buff; } - void Execute(DrawerThread *thread) override + void RtInitColsRGBACommand::Execute(DrawerThread *thread) { thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; } - FString DebugInfo() override + FString RtInitColsRGBACommand::DebugInfo() { return "RtInitCols"; } -}; -template -class DrawColumnHorizRGBACommand : public DrawerCommand -{ - int _count; - fixed_t _iscale; - fixed_t _texturefrac; - const InputPixelType * RESTRICT _source; - int _x; - int _yl; - int _yh; + ///////////////////////////////////////////////////////////////////////////// -public: - DrawColumnHorizRGBACommand() + template + DrawColumnHorizRGBACommand::DrawColumnHorizRGBACommand() { using namespace drawerargs; @@ -201,7 +126,8 @@ public: _yh = dc_yh; } - void Execute(DrawerThread *thread) override + template + void DrawColumnHorizRGBACommand::Execute(DrawerThread *thread) { int count = _count; uint32_t *dest; @@ -252,22 +178,19 @@ public: } while (--count); } - FString DebugInfo() override + template + FString DrawColumnHorizRGBACommand::DebugInfo() { return "DrawColumnHoriz"; } -}; -class FillColumnHorizRGBACommand : public DrawerCommand -{ - int _x; - int _yl; - int _yh; - int _count; - uint32_t _color; + // Generate code for the versions we use: + template class DrawColumnHorizRGBACommand; + template class DrawColumnHorizRGBACommand; -public: - FillColumnHorizRGBACommand() + ///////////////////////////////////////////////////////////////////////////// + + FillColumnHorizRGBACommand::FillColumnHorizRGBACommand() { using namespace drawerargs; @@ -278,7 +201,7 @@ public: _yh = dc_yh; } - void Execute(DrawerThread *thread) override + void FillColumnHorizRGBACommand::Execute(DrawerThread *thread) { int count = _count; uint32_t color = _color; @@ -304,220 +227,8 @@ public: } while (--count); } - FString DebugInfo() override + FString FillColumnHorizRGBACommand::DebugInfo() { return "FillColumnHoriz"; } -}; - -///////////////////////////////////////////////////////////////////////////// - -// Copies one span at hx to the screen at sx. -void rt_copy1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Copies all four spans to the screen starting at sx. -void rt_copy4cols_rgba (int sx, int yl, int yh) -{ - // To do: we could do this with SSE using __m128i - rt_copy1col_rgba(0, sx, yl, yh); - rt_copy1col_rgba(1, sx + 1, yl, yh); - rt_copy1col_rgba(2, sx + 2, yl, yh); - rt_copy1col_rgba(3, sx + 3, yl, yh); -} - -// Maps one span at hx to the screen at sx. -void rt_map1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Maps all four spans to the screen starting at sx. -void rt_map4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates one span at hx to the screen at sx. -void rt_tlate1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates all four spans to the screen starting at sx. -void rt_tlate4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Adds one span at hx to the screen at sx without clamping. -void rt_add1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Adds all four spans to the screen starting at sx without clamping. -void rt_add4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates and adds one span at hx to the screen at sx without clamping. -void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx without clamping. -void rt_tlateadd4cols_rgba(int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Shades one span at hx to the screen at sx. -void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Shades all four spans to the screen starting at sx. -void rt_shaded4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Adds one span at hx to the screen at sx with clamping. -void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Adds all four spans to the screen starting at sx with clamping. -void rt_addclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates and adds one span at hx to the screen at sx with clamping. -void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates and adds all four spans to the screen starting at sx with clamping. -void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Subtracts one span at hx to the screen at sx with clamping. -void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Subtracts all four spans to the screen starting at sx with clamping. -void rt_subclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates and subtracts one span at hx to the screen at sx with clamping. -void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans to the screen starting at sx with clamping. -void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Subtracts one span at hx from the screen at sx with clamping. -void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Subtracts all four spans from the screen starting at sx with clamping. -void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Translates and subtracts one span at hx from the screen at sx with clamping. -void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); -} - -// Translates and subtracts all four spans from the screen starting at sx with clamping. -void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); -} - -// Before each pass through a rendering loop that uses these routines, -// call this function to set up the span pointers. -void rt_initcols_rgba (BYTE *buff) -{ - using namespace drawerargs; - - for (int y = 3; y >= 0; y--) - horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; - - DrawerCommandQueue::QueueCommand(buff); -} - -void rt_span_coverage_rgba(int x, int start, int stop) -{ - using namespace drawerargs; - - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = start; - (*tspan)[1] = stop; - *tspan += 2; -} - -// Stretches a column into a temporary buffer which is later -// drawn to the screen along with up to three other columns. -void R_DrawColumnHoriz_rgba (void) -{ - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - if (drawer_needs_pal_input) - DrawerCommandQueue::QueueCommand>(); - else - DrawerCommandQueue::QueueCommand>(); -} - -// [RH] Just fills a column with a given color -void R_FillColumnHoriz_rgba (void) -{ - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - DrawerCommandQueue::QueueCommand(); -} - } diff --git a/src/r_plane.cpp b/src/r_plane.cpp index 36c2c1da5d..c6c7d6d2f0 100644 --- a/src/r_plane.cpp +++ b/src/r_plane.cpp @@ -511,9 +511,9 @@ void R_MapTiltedPlane_C (int y, int x1) #endif } -void R_MapTiltedPlane_rgba (int y, int x1) +void R_MapTiltedPlane (int y, int x1) { - R_DrawTiltedSpan_rgba(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); + R_DrawTiltedSpan(y, x1, spanend[y], plane_sz, plane_su, plane_sv, plane_shade, planeshade, planelightfloat, pviewx, pviewy); } //========================================================================== @@ -527,9 +527,9 @@ void R_MapColoredPlane_C (int y, int x1) memset (ylookup[y] + x1 + dc_destorg, ds_color, spanend[y] - x1 + 1); } -void R_MapColoredPlane_rgba(int y, int x1) +void R_MapColoredPlane(int y, int x1) { - R_DrawColoredSpan_rgba(y, x1, spanend[y]); + R_DrawColoredSpan(y, x1, spanend[y]); } //========================================================================== @@ -1073,32 +1073,16 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub uint32_t solid_top = frontskytex->GetSkyCapColor(false); uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); - if (r_swtruecolor) - { - if (columns == 4) - if (!backskytex) - R_DrawSingleSkyCol4_rgba(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol4_rgba(solid_top, solid_bottom); + if (columns == 4) + if (!backskytex) + R_DrawSingleSkyCol4(solid_top, solid_bottom); else - if (!backskytex) - R_DrawSingleSkyCol1_rgba(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol1_rgba(solid_top, solid_bottom); - } + R_DrawDoubleSkyCol4(solid_top, solid_bottom); else - { - if (columns == 4) - if (!backskytex) - R_DrawSingleSkyCol4(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol4(solid_top, solid_bottom); + if (!backskytex) + R_DrawSingleSkyCol1(solid_top, solid_bottom); else - if (!backskytex) - R_DrawSingleSkyCol1(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol1(solid_top, solid_bottom); - } + R_DrawDoubleSkyCol1(solid_top, solid_bottom); } static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns)