diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index aa67b13d1..ede6c9a52 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -818,9 +818,7 @@ set( FASTMATH_PCH_SOURCES r_bsp.cpp r_draw.cpp r_draw_pal.cpp - r_drawt_pal.cpp r_draw_rgba.cpp - r_drawt_rgba.cpp r_drawers.cpp r_thread.cpp r_main.cpp diff --git a/src/r_draw.cpp b/src/r_draw.cpp index c7bf95eed..f629c027d 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -147,9 +147,6 @@ namespace swrenderer fuzzcolfunc = R_DrawFuzzColumn; transcolfunc = R_DrawTranslatedColumn; spanfunc = R_DrawSpan; - hcolfunc_pre = R_DrawColumnHoriz; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; } void R_InitShadeMaps() @@ -233,20 +230,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillColumn; - hcolfunc_post1 = rt_copy1col; - hcolfunc_post4 = rt_copy4cols; } else if (dc_translation == NULL) { colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; } else { colfunc = transcolfunc; - hcolfunc_post1 = rt_tlate1col; - hcolfunc_post4 = rt_tlate4cols; drawer_needs_pal_input = true; } return true; @@ -284,20 +275,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; } else if (dc_translation == NULL) { colfunc = R_DrawAddColumn; - hcolfunc_post1 = rt_add1col; - hcolfunc_post4 = rt_add4cols; } else { colfunc = R_DrawTlatedAddColumn; - hcolfunc_post1 = rt_tlateadd1col; - hcolfunc_post4 = rt_tlateadd4cols; drawer_needs_pal_input = true; } } @@ -306,20 +291,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; } else if (dc_translation == NULL) { colfunc = R_DrawAddClampColumn; - hcolfunc_post1 = rt_addclamp1col; - hcolfunc_post4 = rt_addclamp4cols; } else { colfunc = R_DrawAddClampTranslatedColumn; - hcolfunc_post1 = rt_tlateaddclamp1col; - hcolfunc_post4 = rt_tlateaddclamp4cols; drawer_needs_pal_input = true; } } @@ -329,20 +308,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; } else if (dc_translation == NULL) { colfunc = R_DrawSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; } else { colfunc = R_DrawSubClampTranslatedColumn; - hcolfunc_post1 = rt_tlatesubclamp1col; - hcolfunc_post4 = rt_tlatesubclamp4cols; drawer_needs_pal_input = true; } return true; @@ -355,20 +328,14 @@ namespace swrenderer if (flags & STYLEF_ColorIsFixed) { colfunc = R_FillRevSubClampColumn; - hcolfunc_post1 = rt_subclamp1col; - hcolfunc_post4 = rt_subclamp4cols; } else if (dc_translation == NULL) { colfunc = R_DrawRevSubClampColumn; - hcolfunc_post1 = rt_revsubclamp1col; - hcolfunc_post4 = rt_revsubclamp4cols; } else { colfunc = R_DrawRevSubClampTranslatedColumn; - hcolfunc_post1 = rt_tlaterevsubclamp1col; - hcolfunc_post4 = rt_tlaterevsubclamp4cols; drawer_needs_pal_input = true; } return true; @@ -439,7 +406,6 @@ namespace swrenderer } } basecolormapsave = basecolormap; - hcolfunc_pre = R_DrawColumnHoriz; // Check for special modes if (style.BlendOp == STYLEOP_Fuzz) @@ -453,8 +419,6 @@ namespace swrenderer if ((alpha >>= 12) == 0) return false; colfunc = R_DrawShadedColumn; - hcolfunc_post1 = rt_shaded1col; - hcolfunc_post4 = rt_shaded4cols; drawer_needs_pal_input = true; dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; basecolormap = &ShadeFakeColormap[16 - alpha]; @@ -491,7 +455,6 @@ namespace swrenderer // dc_srccolor is used by the R_Fill* routines. It is premultiplied // with the alpha. dc_srccolor = ((((r*x) >> 4) << 20) | ((g*x) >> 4) | ((((b)*x) >> 4) << 10)) & 0x3feffbff; - hcolfunc_pre = R_FillColumnHoriz; R_SetColorMapLight(&identitycolormap, 0, 0); } @@ -638,191 +601,6 @@ namespace swrenderer } } - void rt_initcols(uint8_t *buffer) - { - using namespace drawerargs; - - for (int y = 3; y >= 0; y--) - horizspan[y] = dc_ctspan[y] = &dc_tspans[y][0]; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(buffer); - else - DrawerCommandQueue::QueueCommand(buffer); - } - - void rt_span_coverage(int x, int start, int stop) - { - using namespace drawerargs; - - unsigned int **tspan = &dc_ctspan[x & 3]; - (*tspan)[0] = start; - (*tspan)[1] = stop; - *tspan += 2; - } - - void rt_flip_posts() - { - using namespace drawerargs; - - unsigned int *front = horizspan[dc_x & 3]; - unsigned int *back = dc_ctspan[dc_x & 3] - 2; - - while (front < back) - { - swapvalues(front[0], back[0]); - swapvalues(front[1], back[1]); - front += 2; - back -= 2; - } - } - - void rt_draw4cols(int sx) - { - using namespace drawerargs; - - int x, bad; - unsigned int maxtop, minbot, minnexttop; - - // Place a dummy "span" in each column. These don't get - // drawn. They're just here to avoid special cases in the - // max/min calculations below. - for (x = 0; x < 4; ++x) - { - dc_ctspan[x][0] = screen->GetHeight()+1; - dc_ctspan[x][1] = screen->GetHeight(); - } - - for (;;) - { - // If a column is out of spans, mark it as such - bad = 0; - minnexttop = 0xffffffff; - for (x = 0; x < 4; ++x) - { - if (horizspan[x] >= dc_ctspan[x]) - { - bad |= 1 << x; - } - else if ((horizspan[x]+2)[0] < minnexttop) - { - minnexttop = (horizspan[x]+2)[0]; - } - } - // Once all columns are out of spans, we're done - if (bad == 15) - { - return; - } - - // Find the largest shared area for the spans in each column - maxtop = MAX (MAX (horizspan[0][0], horizspan[1][0]), - MAX (horizspan[2][0], horizspan[3][0])); - minbot = MIN (MIN (horizspan[0][1], horizspan[1][1]), - MIN (horizspan[2][1], horizspan[3][1])); - - // If there is no shared area with these spans, draw each span - // individually and advance to the next spans until we reach a shared area. - // However, only draw spans down to the highest span in the next set of - // spans. If we allow the entire height of a span to be drawn, it could - // prevent any more shared areas from being drawn in these four columns. - // - // Example: Suppose we have the following arrangement: - // A CD - // A CD - // B D - // B D - // aB D - // aBcD - // aBcD - // aBc - // - // If we draw the entire height of the spans, we end up drawing this first: - // A CD - // A CD - // B D - // B D - // B D - // B D - // B D - // B D - // B - // - // This leaves only the "a" and "c" columns to be drawn, and they are not - // part of a shared area, but if we can include B and D with them, we can - // get a shared area. So we cut off everything in the first set just - // above the "a" column and end up drawing this first: - // A CD - // A CD - // B D - // B D - // - // Then the next time through, we have the following arrangement with an - // easily shared area to draw: - // aB D - // aBcD - // aBcD - // aBc - if (bad != 0 || maxtop > minbot) - { - int drawcount = 0; - for (x = 0; x < 4; ++x) - { - if (!(bad & 1)) - { - if (horizspan[x][1] < minnexttop) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], horizspan[x][1]); - horizspan[x] += 2; - drawcount++; - } - else if (minnexttop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], minnexttop-1); - horizspan[x][0] = minnexttop; - drawcount++; - } - } - bad >>= 1; - } - // Drawcount *should* always be non-zero. The reality is that some situations - // can make this not true. Unfortunately, I'm not sure what those situations are. - if (drawcount == 0) - { - return; - } - continue; - } - - // Draw any span fragments above the shared area. - for (x = 0; x < 4; ++x) - { - if (maxtop > horizspan[x][0]) - { - hcolfunc_post1 (x, sx+x, horizspan[x][0], maxtop-1); - } - } - - // Draw the shared area. - hcolfunc_post4 (sx, maxtop, minbot); - - // For each column, if part of the span is past the shared area, - // set its top to just below the shared area. Otherwise, advance - // to the next span in that column. - for (x = 0; x < 4; ++x) - { - if (minbot < horizspan[x][1]) - { - horizspan[x][0] = minbot+1; - } - else - { - horizspan[x] += 2; - } - } - } - } - void R_SetupSpanBits(FTexture *tex) { using namespace drawerargs; @@ -855,325 +633,6 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - void R_FillColumnHoriz() - { - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawColumnHoriz() - { - using namespace drawerargs; - - if (dc_count <= 0) - return; - - int x = dc_x & 3; - unsigned int **span = &dc_ctspan[x]; - (*span)[0] = dc_yl; - (*span)[1] = dc_yh; - *span += 2; - - if (r_swtruecolor) - { - if (drawer_needs_pal_input) - DrawerCommandQueue::QueueCommand>(); - else - DrawerCommandQueue::QueueCommand>(); - } - else - { - DrawerCommandQueue::QueueCommand(); - } - } - - // Copies one span at hx to the screen at sx. - void rt_copy1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Copies all four spans to the screen starting at sx. - void rt_copy4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - // To do: we could do this with SSE using __m128i - rt_copy1col(0, sx, yl, yh); - rt_copy1col(1, sx + 1, yl, yh); - rt_copy1col(2, sx + 2, yl, yh); - rt_copy1col(3, sx + 3, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - } - - // Maps one span at hx to the screen at sx. - void rt_map1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Maps all four spans to the screen starting at sx. - void rt_map4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates one span at hx to the screen at sx. - void rt_tlate1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_map1col(hx, sx, yl, yh); - } - } - - // Translates all four spans to the screen starting at sx. - void rt_tlate4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_map4cols(sx, yl, yh); - } - } - - // Adds one span at hx to the screen at sx without clamping. - void rt_add1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Adds all four spans to the screen starting at sx without clamping. - void rt_add4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and adds one span at hx to the screen at sx without clamping. - void rt_tlateadd1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_add1col(hx, sx, yl, yh); - } - } - - // Translates and adds all four spans to the screen starting at sx without clamping. - void rt_tlateadd4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_add4cols(sx, yl, yh); - } - } - - // Shades one span at hx to the screen at sx. - void rt_shaded1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Shades all four spans to the screen starting at sx. - void rt_shaded4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Adds one span at hx to the screen at sx with clamping. - void rt_addclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Adds all four spans to the screen starting at sx with clamping. - void rt_addclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and adds one span at hx to the screen at sx with clamping. - void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_addclamp1col(hx, sx, yl, yh); - } - } - - // Translates and adds all four spans to the screen starting at sx with clamping. - void rt_tlateaddclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_addclamp4cols(sx, yl, yh); - } - } - - // Subtracts one span at hx to the screen at sx with clamping. - void rt_subclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Subtracts all four spans to the screen starting at sx with clamping. - void rt_subclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and subtracts one span at hx to the screen at sx with clamping. - void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_subclamp1col(hx, sx, yl, yh); - } - } - - // Translates and subtracts all four spans to the screen starting at sx with clamping. - void rt_tlatesubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_subclamp4cols(sx, yl, yh); - } - } - - // Subtracts one span at hx from the screen at sx with clamping. - void rt_revsubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - - // Subtracts all four spans from the screen starting at sx with clamping. - void rt_revsubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - else - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - - // Translates and subtracts one span at hx from the screen at sx with clamping. - void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); - rt_revsubclamp1col(hx, sx, yl, yh); - } - } - - // Translates and subtracts all four spans from the screen starting at sx with clamping. - void rt_tlaterevsubclamp4cols(int sx, int yl, int yh) - { - if (r_swtruecolor) - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - } - else - { - DrawerCommandQueue::QueueCommand(0, sx, yl, yh); - rt_revsubclamp4cols(sx, yl, yh); - } - } - void R_DrawWallCol1() { if (r_swtruecolor) diff --git a/src/r_draw.h b/src/r_draw.h index 752647943..c508aa268 100644 --- a/src/r_draw.h +++ b/src/r_draw.h @@ -134,35 +134,6 @@ namespace swrenderer const uint8_t *R_GetColumn(FTexture *tex, int col); - void rt_initcols(uint8_t *buffer = nullptr); - void rt_span_coverage(int x, int start, int stop); - void rt_draw4cols(int sx); - void rt_flip_posts(); - void rt_copy1col(int hx, int sx, int yl, int yh); - void rt_copy4cols(int sx, int yl, int yh); - void rt_shaded1col(int hx, int sx, int yl, int yh); - void rt_shaded4cols(int sx, int yl, int yh); - void rt_map1col(int hx, int sx, int yl, int yh); - void rt_add1col(int hx, int sx, int yl, int yh); - void rt_addclamp1col(int hx, int sx, int yl, int yh); - void rt_subclamp1col(int hx, int sx, int yl, int yh); - void rt_revsubclamp1col(int hx, int sx, int yl, int yh); - void rt_tlate1col(int hx, int sx, int yl, int yh); - void rt_tlateadd1col(int hx, int sx, int yl, int yh); - void rt_tlateaddclamp1col(int hx, int sx, int yl, int yh); - void rt_tlatesubclamp1col(int hx, int sx, int yl, int yh); - void rt_tlaterevsubclamp1col(int hx, int sx, int yl, int yh); - void rt_map4cols(int sx, int yl, int yh); - void rt_add4cols(int sx, int yl, int yh); - void rt_addclamp4cols(int sx, int yl, int yh); - void rt_subclamp4cols(int sx, int yl, int yh); - void rt_revsubclamp4cols(int sx, int yl, int yh); - void rt_tlate4cols(int sx, int yl, int yh); - void rt_tlateadd4cols(int sx, int yl, int yh); - void rt_tlateaddclamp4cols(int sx, int yl, int yh); - void rt_tlatesubclamp4cols(int sx, int yl, int yh); - void rt_tlaterevsubclamp4cols(int sx, int yl, int yh); - void R_DrawColumnHoriz(); void R_DrawColumn(); void R_DrawFuzzColumn(); void R_DrawTranslatedColumn(); @@ -192,7 +163,6 @@ namespace swrenderer void R_SetupDrawSlab(FSWColormap *base_colormap, float light, int shade); void R_DrawSlab(int dx, fixed_t v, int dy, fixed_t vi, const uint8_t *vptr, uint8_t *p); void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); - void R_FillColumnHoriz(); void R_FillSpan(); void R_DrawWallCol1(); diff --git a/src/r_draw_pal.h b/src/r_draw_pal.h index 205e0cc28..984849b83 100644 --- a/src/r_draw_pal.h +++ b/src/r_draw_pal.h @@ -248,88 +248,4 @@ namespace swrenderer const uint8_t *_colormap; uint8_t *_destorg; }; - - class RtInitColsPalCommand : public DrawerCommand - { - public: - RtInitColsPalCommand(uint8_t *buff); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "RtInitColsPalCommand"; } - - private: - uint8_t *buff; - }; - - class PalColumnHorizCommand : public DrawerCommand - { - public: - PalColumnHorizCommand(); - - protected: - const uint8_t *_source; - fixed_t _iscale; - fixed_t _texturefrac; - int _count; - int _color; - int _x; - int _yl; - }; - - class DrawColumnHorizPalCommand : public PalColumnHorizCommand - { - public: - void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "DrawColumnHorizPalCommand"; } - }; - - class FillColumnHorizPalCommand : public PalColumnHorizCommand - { - public: - void Execute(DrawerThread *thread) override; - FString DebugInfo() override { return "FillColumnHorizPalCommand"; } - }; - - class PalRtCommand : public DrawerCommand - { - public: - PalRtCommand(int hx, int sx, int yl, int yh); - FString DebugInfo() override { return "PalRtCommand"; } - - protected: - int hx, sx, yl, yh; - uint8_t *_destorg; - int _pitch; - const uint8_t *_colormap; - const uint32_t *_srcblend; - const uint32_t *_destblend; - const uint8_t *_translation; - fixed_t _srcalpha; - fixed_t _destalpha; - int _color; - }; - - class DrawColumnRt1CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4CopyPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4PalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4TranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4AddPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt1AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt4AddTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4ShadedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4AddClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt1AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt4AddClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4SubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt1SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt4SubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt1RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - class DrawColumnRt4RevSubClampPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt1RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; - //class DrawColumnRt4RevSubClampTranslatedPalCommand : public PalRtCommand { public: using PalRtCommand::PalRtCommand; void Execute(DrawerThread *thread) override; }; } diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index a9fc5c80e..4b5685608 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -1123,4 +1123,84 @@ namespace swrenderer } #endif + ///////////////////////////////////////////////////////////////////////////// + + namespace + { + static uint32_t particle_texture[16 * 16] = + { + 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1, + 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, + 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, + 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, + 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, + 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, + 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, + 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, + 1 * 8, 2 * 8, 3 * 8, 4 * 8, 5 * 8, 6 * 8, 7 * 8, 8 * 8, 8 * 8, 7 * 8, 6 * 8, 5 * 8, 4 * 8, 3 * 8, 2 * 8, 1 * 8, + 1 * 7, 2 * 7, 3 * 7, 4 * 7, 5 * 7, 6 * 7, 7 * 7, 8 * 7, 8 * 7, 7 * 7, 6 * 7, 5 * 7, 4 * 7, 3 * 7, 2 * 7, 1 * 7, + 1 * 6, 2 * 6, 3 * 6, 4 * 6, 5 * 6, 6 * 6, 7 * 6, 8 * 6, 8 * 6, 7 * 6, 6 * 6, 5 * 6, 4 * 6, 3 * 6, 2 * 6, 1 * 6, + 1 * 5, 2 * 5, 3 * 5, 4 * 5, 5 * 5, 6 * 5, 7 * 5, 8 * 5, 8 * 5, 7 * 5, 6 * 5, 5 * 5, 4 * 5, 3 * 5, 2 * 5, 1 * 5, + 1 * 4, 2 * 4, 3 * 4, 4 * 4, 5 * 4, 6 * 4, 7 * 4, 8 * 4, 8 * 4, 7 * 4, 6 * 4, 5 * 4, 4 * 4, 3 * 4, 2 * 4, 1 * 4, + 1 * 3, 2 * 3, 3 * 3, 4 * 3, 5 * 3, 6 * 3, 7 * 3, 8 * 3, 8 * 3, 7 * 3, 6 * 3, 5 * 3, 4 * 3, 3 * 3, 2 * 3, 1 * 3, + 1 * 2, 2 * 2, 3 * 2, 4 * 2, 5 * 2, 6 * 2, 7 * 2, 8 * 2, 8 * 2, 7 * 2, 6 * 2, 5 * 2, 4 * 2, 3 * 2, 2 * 2, 1 * 2, + 1 * 1, 2 * 1, 3 * 1, 4 * 1, 5 * 1, 6 * 1, 7 * 1, 8 * 1, 8 * 1, 7 * 1, 6 * 1, 5 * 1, 4 * 1, 3 * 1, 2 * 1, 1 * 1 + }; + } + + DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx) + { + _dest = dest; + _pitch = pitch; + _count = count; + _fg = fg; + _alpha = alpha; + _fracposx = fracposx; + _dest_y = dest_y; + } + + void DrawParticleColumnRGBACommand::Execute(DrawerThread *thread) + { + int count = thread->count_for_thread(_dest_y, _count); + if (count <= 0) + return; + + uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); + int pitch = _pitch * thread->num_cores; + + const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * 16]; + uint32_t particle_alpha = _alpha; + + uint32_t fracstep = 16 * FRACUNIT / _count; + uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2; + fracstep *= thread->num_cores; + + uint32_t fg_red = (_fg >> 16) & 0xff; + uint32_t fg_green = (_fg >> 8) & 0xff; + uint32_t fg_blue = _fg & 0xff; + + for (int y = 0; y < count; y++) + { + uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 6; + uint32_t inv_alpha = 256 - alpha; + + uint32_t bg_red = (*dest >> 16) & 0xff; + uint32_t bg_green = (*dest >> 8) & 0xff; + uint32_t bg_blue = (*dest) & 0xff; + + uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 256; + uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 256; + uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 256; + + *dest = 0xff000000 | (red << 16) | (green << 8) | blue; + dest += pitch; + fracpos += fracstep; + } + } + + FString DrawParticleColumnRGBACommand::DebugInfo() + { + return "DrawParticle"; + } + } diff --git a/src/r_draw_rgba.h b/src/r_draw_rgba.h index d5b269106..1364d537b 100644 --- a/src/r_draw_rgba.h +++ b/src/r_draw_rgba.h @@ -361,85 +361,6 @@ namespace swrenderer } }; - class DrawColumnRt1LLVMCommand : public DrawerCommand - { - protected: - DrawColumnArgs args; - WorkerThreadData ThreadData(DrawerThread *thread); - - public: - DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - - DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand); - DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand); - - ///////////////////////////////////////////////////////////////////////////// - - class RtInitColsRGBACommand : public DrawerCommand - { - BYTE * RESTRICT buff; - - public: - RtInitColsRGBACommand(BYTE *buff); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - - template - class DrawColumnHorizRGBACommand : public DrawerCommand - { - int _count; - fixed_t _iscale; - fixed_t _texturefrac; - const InputPixelType * RESTRICT _source; - int _x; - int _yl; - int _yh; - - public: - DrawColumnHorizRGBACommand(); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - - class FillColumnHorizRGBACommand : public DrawerCommand - { - int _x; - int _yl; - int _yh; - int _count; - uint32_t _color; - - public: - FillColumnHorizRGBACommand(); - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - ///////////////////////////////////////////////////////////////////////////// class DrawParticleColumnRGBACommand : public DrawerCommand diff --git a/src/r_drawers.cpp b/src/r_drawers.cpp index dd81af109..e61e95183 100644 --- a/src/r_drawers.cpp +++ b/src/r_drawers.cpp @@ -175,30 +175,6 @@ Drawers::Drawers() FillColumnAddClamp = FillColumnAddClamp_SSE2; FillColumnSubClamp = FillColumnSubClamp_SSE2; FillColumnRevSubClamp = FillColumnRevSubClamp_SSE2; - DrawColumnRt1 = DrawColumnRt1_SSE2; - DrawColumnRt1Copy = DrawColumnRt1Copy_SSE2; - DrawColumnRt1Add = DrawColumnRt1Add_SSE2; - DrawColumnRt1Shaded = DrawColumnRt1Shaded_SSE2; - DrawColumnRt1AddClamp = DrawColumnRt1AddClamp_SSE2; - DrawColumnRt1SubClamp = DrawColumnRt1SubClamp_SSE2; - DrawColumnRt1RevSubClamp = DrawColumnRt1RevSubClamp_SSE2; - DrawColumnRt1Translated = DrawColumnRt1Translated_SSE2; - DrawColumnRt1TlatedAdd = DrawColumnRt1TlatedAdd_SSE2; - DrawColumnRt1AddClampTranslated = DrawColumnRt1AddClampTranslated_SSE2; - DrawColumnRt1SubClampTranslated = DrawColumnRt1SubClampTranslated_SSE2; - DrawColumnRt1RevSubClampTranslated = DrawColumnRt1RevSubClampTranslated_SSE2; - DrawColumnRt4 = DrawColumnRt4_SSE2; - DrawColumnRt4Copy = DrawColumnRt4Copy_SSE2; - DrawColumnRt4Add = DrawColumnRt4Add_SSE2; - DrawColumnRt4Shaded = DrawColumnRt4Shaded_SSE2; - DrawColumnRt4AddClamp = DrawColumnRt4AddClamp_SSE2; - DrawColumnRt4SubClamp = DrawColumnRt4SubClamp_SSE2; - DrawColumnRt4RevSubClamp = DrawColumnRt4RevSubClamp_SSE2; - DrawColumnRt4Translated = DrawColumnRt4Translated_SSE2; - DrawColumnRt4TlatedAdd = DrawColumnRt4TlatedAdd_SSE2; - DrawColumnRt4AddClampTranslated = DrawColumnRt4AddClampTranslated_SSE2; - DrawColumnRt4SubClampTranslated = DrawColumnRt4SubClampTranslated_SSE2; - DrawColumnRt4RevSubClampTranslated = DrawColumnRt4RevSubClampTranslated_SSE2; DrawSpan = DrawSpan_SSE2; DrawSpanMasked = DrawSpanMasked_SSE2; DrawSpanTranslucent = DrawSpanTranslucent_SSE2; diff --git a/src/r_drawers.h b/src/r_drawers.h index 2ff2fd087..7a94c82dd 100644 --- a/src/r_drawers.h +++ b/src/r_drawers.h @@ -308,30 +308,6 @@ public: void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt1RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnRt4RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; diff --git a/src/r_drawt_pal.cpp b/src/r_drawt_pal.cpp deleted file mode 100644 index dcc0a0938..000000000 --- a/src/r_drawt_pal.cpp +++ /dev/null @@ -1,1064 +0,0 @@ -/* -** r_drawt.cpp -** Faster column drawers for modern processors -** -**--------------------------------------------------------------------------- -** Copyright 1998-2006 Randy Heit -** Copyright 2016 Magnus Norddahl -** Copyright 2016 Rachael Alexanderson -** All rights reserved. -** -** Redistribution and use in source and binary forms, with or without -** modification, are permitted provided that the following conditions -** are met: -** -** 1. Redistributions of source code must retain the above copyright -** notice, this list of conditions and the following disclaimer. -** 2. Redistributions in binary form must reproduce the above copyright -** notice, this list of conditions and the following disclaimer in the -** documentation and/or other materials provided with the distribution. -** 3. The name of the author may not be used to endorse or promote products -** derived from this software without specific prior written permission. -** -** THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR -** IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES -** OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. -** IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, -** INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT -** NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -** DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -** THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -** (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF -** THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -**--------------------------------------------------------------------------- -** -** These functions stretch columns into a temporary buffer and then -** map them to the screen. On modern machines, this is faster than drawing -** them directly to the screen. -** -** Will I be able to even understand any of this if I come back to it later? -** Let's hope so. :-) -*/ - -#include "templates.h" -#include "doomtype.h" -#include "doomdef.h" -#include "r_defs.h" -#include "r_draw.h" -#include "r_main.h" -#include "r_things.h" -#include "v_video.h" -#include "r_draw_pal.h" - -EXTERN_CVAR(Bool, r_blendmethod) - -// I should have commented this stuff better. -// -// dc_temp is the buffer R_DrawColumnHoriz writes into. -// dc_tspans points into it. -// dc_ctspan points into dc_tspans. -// horizspan also points into dc_tspans. - -// dc_ctspan is advanced while drawing into dc_temp. -// horizspan is advanced up to dc_ctspan when drawing from dc_temp to the screen. - -namespace swrenderer -{ - RtInitColsPalCommand::RtInitColsPalCommand(uint8_t *buff) : buff(buff) - { - } - - void RtInitColsPalCommand::Execute(DrawerThread *thread) - { - thread->dc_temp = buff == nullptr ? thread->dc_temp_buff : buff; - } - - ///////////////////////////////////////////////////////////////////// - - PalColumnHorizCommand::PalColumnHorizCommand() - { - using namespace drawerargs; - - _source = dc_source; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _count = dc_count; - _color = dc_color; - _x = dc_x; - _yl = dc_yl; - } - - void DrawColumnHorizPalCommand::Execute(DrawerThread *thread) - { - int count = _count; - uint8_t *dest; - fixed_t fracstep; - fixed_t frac; - - count = thread->count_for_thread(_yl, count); - if (count <= 0) - return; - - fracstep = _iscale; - frac = _texturefrac; - - const uint8_t *source = _source; - - int x = _x & 3; - dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; - frac += fracstep * thread->skipped_by_thread(_yl); - fracstep *= thread->num_cores; - - if (count & 1) { - *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest[16] = source[frac >> FRACBITS]; frac += fracstep; - dest[20] = source[frac >> FRACBITS]; frac += fracstep; - dest[24] = source[frac >> FRACBITS]; frac += fracstep; - dest[28] = source[frac >> FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } - - void FillColumnHorizPalCommand::Execute(DrawerThread *thread) - { - int count = _count; - uint8_t color = _color; - uint8_t *dest; - - count = thread->count_for_thread(_yl, count); - if (count <= 0) - return; - - int x = _x & 3; - dest = &thread->dc_temp[x + thread->temp_line_for_thread(_yl) * 4]; - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); - } - - ///////////////////////////////////////////////////////////////////// - - PalRtCommand::PalRtCommand(int hx, int sx, int yl, int yh) : hx(hx), sx(sx), yl(yl), yh(yh) - { - using namespace drawerargs; - - _destorg = dc_destorg; - _pitch = dc_pitch; - _colormap = dc_colormap; - _srcblend = dc_srcblend; - _destblend = dc_destblend; - _srcalpha = dc_srcalpha; - _destalpha = dc_destalpha; - _translation = dc_translation; - _color = dc_color; - } - - void DrawColumnRt1CopyPalCommand::Execute(DrawerThread *thread) - { - uint8_t *source; - uint8_t *dest; - int count; - int pitch; - - count = yh - yl + 1; - - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - - if (count & 1) { - *dest = *source; - source += 4; - dest += pitch; - } - if (count & 2) { - dest[0] = source[0]; - dest[pitch] = source[4]; - source += 8; - dest += pitch*2; - } - if (!(count >>= 2)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4]; - dest[pitch*2] = source[8]; - dest[pitch*3] = source[12]; - source += 16; - dest += pitch*4; - } while (--count); - } - - void DrawColumnRt4CopyPalCommand::Execute(DrawerThread *thread) - { - int *source; - int *dest; - int count; - int pitch; - - count = yh - yl + 1; - - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - dest = (int *)(ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg); - source = (int *)(&thread->dc_temp[thread->temp_line_for_thread(yl)*4]); - pitch = _pitch*thread->num_cores/sizeof(int); - - if (count & 1) { - *dest = *source; - source += 4/sizeof(int); - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = source[0]; - dest[pitch] = source[4/sizeof(int)]; - source += 8/sizeof(int); - dest += pitch*2; - } while (--count); - } - - void DrawColumnRt1PalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int count; - int pitch; - - count = yh - yl + 1; - - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - colormap = _colormap; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl) *4 + hx]; - pitch = _pitch*thread->num_cores; - - if (count & 1) { - *dest = colormap[*source]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[pitch] = colormap[source[4]]; - source += 8; - dest += pitch*2; - } while (--count); - } - - void DrawColumnRt4PalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int count; - int pitch; - - count = yh - yl + 1; - - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - colormap = _colormap; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch*thread->num_cores; - - if (count & 1) { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - source += 4; - dest += pitch; - } - if (!(count >>= 1)) - return; - - do { - dest[0] = colormap[source[0]]; - dest[1] = colormap[source[1]]; - dest[2] = colormap[source[2]]; - dest[3] = colormap[source[3]]; - dest[pitch] = colormap[source[4]]; - dest[pitch+1] = colormap[source[5]]; - dest[pitch+2] = colormap[source[6]]; - dest[pitch+3] = colormap[source[7]]; - source += 8; - dest += pitch*2; - } while (--count); - } - - void DrawColumnRt1TranslatedPalCommand::Execute(DrawerThread *thread) - { - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - const uint8_t *translation = _translation; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - uint8_t b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } - } - - void DrawColumnRt4TranslatedPalCommand::Execute(DrawerThread *thread) - { - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - uint8_t *source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - const uint8_t *translation = _translation; - int c0, c1; - uint8_t b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } - } - - void DrawColumnRt1AddPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t fg = colormap[*source]; - uint32_t bg = *dest; - - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - *dest = RGB32k.All[fg & (fg>>15)]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - uint32_t fg = colormap[*source]; - uint32_t bg = *dest; - - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4AddPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t fg = colormap[source[0]]; - uint32_t bg = dest[0]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[0] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[1]]; - bg = dest[1]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[1] = RGB32k.All[fg & (fg>>15)]; - - - fg = colormap[source[2]]; - bg = dest[2]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[2] = RGB32k.All[fg & (fg>>15)]; - - fg = colormap[source[3]]; - bg = dest[3]; - fg = fg2rgb[fg]; - bg = bg2rgb[bg]; - fg = (fg+bg) | 0x1f07c1f; - dest[3] = RGB32k.All[fg & (fg>>15)]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - for (int ks = 0; ks < 4; ks++) - { // [SP] this 4col function was a block of copy-pasted code. 4 times. I regret nothing. - uint32_t fg = colormap[source[ks]]; - uint32_t bg = dest[ks]; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - dest[ks] = RGB256k.RGB[r][g][b]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt1ShadedPalCommand::Execute(DrawerThread *thread) - { - uint32_t *fgstart; - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - fgstart = &Col2RGB8[0][_color]; - colormap = _colormap; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t val = colormap[*source]; - uint32_t fg = fgstart[val<<8]; - val = (Col2RGB8[64-val][*dest] + fg) | 0x1f07c1f; - *dest = RGB32k.All[val & (val>>15)]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - uint32_t val = *source; - int r = (palette[*dest].r * (255-val) + palette[_color].r * val) >> 10; - int g = (palette[*dest].g * (255-val) + palette[_color].g * val) >> 10; - int b = (palette[*dest].b * (255-val) + palette[_color].b * val) >> 10; - *dest = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4ShadedPalCommand::Execute(DrawerThread *thread) - { - uint32_t *fgstart; - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - fgstart = &Col2RGB8[0][_color]; - colormap = _colormap; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t val; - - val = colormap[source[0]]; - val = (Col2RGB8[64-val][dest[0]] + fgstart[val<<8]) | 0x1f07c1f; - dest[0] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[1]]; - val = (Col2RGB8[64-val][dest[1]] + fgstart[val<<8]) | 0x1f07c1f; - dest[1] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[2]]; - val = (Col2RGB8[64-val][dest[2]] + fgstart[val<<8]) | 0x1f07c1f; - dest[2] = RGB32k.All[val & (val>>15)]; - - val = colormap[source[3]]; - val = (Col2RGB8[64-val][dest[3]] + fgstart[val<<8]) | 0x1f07c1f; - dest[3] = RGB32k.All[val & (val>>15)]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - uint32_t val; - - for (int ks = 0; ks < 4; ks++) - { - val = source[ks]; - int r = (palette[dest[ks]].r * (255-val) + palette[_color].r * val) >> 10; - int g = (palette[dest[ks]].g * (255-val) + palette[_color].g * val) >> 10; - int b = (palette[dest[ks]].b * (255-val) + palette[_color].b * val) >> 10; - dest[ks] = RGB256k.RGB[clamp(r,0,63)][clamp(g,0,63)][clamp(b,0,63)]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt1AddClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = fg2rgb[colormap[*source]] + bg2rgb[*dest]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4AddClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - - if (!r_blendmethod) - { - do { - uint32_t a = fg2rgb[colormap[source[0]]] + bg2rgb[dest[0]]; - uint32_t b = a; - - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[1]]] + bg2rgb[dest[1]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[2]]] + bg2rgb[dest[2]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = fg2rgb[colormap[source[3]]] + bg2rgb[dest[3]]; - b = a; - a |= 0x01f07c1f; - b &= 0x40100400; - a &= 0x3fffffff; - b = b - (b >> 5); - a |= b; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MIN((palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 63); - int g = MIN((palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 63); - int b = MIN((palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 63); - dest[ks] = RGB256k.RGB[r][g][b]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt1SubClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = (fg2rgb[colormap[*source]] | 0x40100400) - bg2rgb[*dest]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4SubClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = (fg2rgb[colormap[source[0]]] | 0x40100400) - bg2rgb[dest[0]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[1]]] | 0x40100400) - bg2rgb[dest[1]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[2]]] | 0x40100400) - bg2rgb[dest[2]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (fg2rgb[colormap[source[3]]] | 0x40100400) - bg2rgb[dest[3]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MAX((palette[fg].r * _srcalpha - palette[bg].r * _destalpha)>>18, 0); - int g = MAX((palette[fg].g * _srcalpha - palette[bg].g * _destalpha)>>18, 0); - int b = MAX((palette[fg].b * _srcalpha - palette[bg].b * _destalpha)>>18, 0); - dest[ks] = RGB256k.RGB[r][g][b]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt1RevSubClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4 + hx]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = (bg2rgb[*dest] | 0x40100400) - fg2rgb[colormap[*source]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - *dest = RGB32k.All[(a>>15) & a]; - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - int fg = colormap[*source]; - int bg = *dest; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - *dest = RGB256k.RGB[r][g][b]; - source += 4; - dest += pitch; - } while (--count); - } - } - - void DrawColumnRt4RevSubClampPalCommand::Execute(DrawerThread *thread) - { - const uint8_t *colormap; - uint8_t *source; - uint8_t *dest; - int pitch; - - int count = yh - yl + 1; - count = thread->count_for_thread(yl, count); - if (count <= 0) - return; - - const uint32_t *fg2rgb = _srcblend; - const uint32_t *bg2rgb = _destblend; - dest = ylookup[yl + thread->skipped_by_thread(yl)] + sx + _destorg; - source = &thread->dc_temp[thread->temp_line_for_thread(yl)*4]; - pitch = _pitch * thread->num_cores; - colormap = _colormap; - const PalEntry *palette = GPalette.BaseColors; - - if (!r_blendmethod) - { - do { - uint32_t a = (bg2rgb[dest[0]] | 0x40100400) - fg2rgb[colormap[source[0]]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[0] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[1]] | 0x40100400) - fg2rgb[colormap[source[1]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[1] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[2]] | 0x40100400) - fg2rgb[colormap[source[2]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[2] = RGB32k.All[(a>>15) & a]; - - a = (bg2rgb[dest[3]] | 0x40100400) - fg2rgb[colormap[source[3]]]; - b = a; - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[3] = RGB32k.All[(a>>15) & a]; - - source += 4; - dest += pitch; - } while (--count); - } - else - { - do { - for (int ks = 0; ks < 4; ks++) - { - int fg = colormap[source[ks]]; - int bg = dest[ks]; - int r = MAX((-palette[fg].r * _srcalpha + palette[bg].r * _destalpha)>>18, 0); - int g = MAX((-palette[fg].g * _srcalpha + palette[bg].g * _destalpha)>>18, 0); - int b = MAX((-palette[fg].b * _srcalpha + palette[bg].b * _destalpha)>>18, 0); - dest[ks] = RGB256k.RGB[r][g][b]; - } - - source += 4; - dest += pitch; - } while (--count); - } - } -} diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp deleted file mode 100644 index b5be7a9c3..000000000 --- a/src/r_drawt_rgba.cpp +++ /dev/null @@ -1,314 +0,0 @@ -/* -** Drawer commands for the RT family of drawers -** Copyright (c) 2016 Magnus Norddahl -** -** This software is provided 'as-is', without any express or implied -** warranty. In no event will the authors be held liable for any damages -** arising from the use of this software. -** -** Permission is granted to anyone to use this software for any purpose, -** including commercial applications, and to alter it and redistribute it -** freely, subject to the following restrictions: -** -** 1. The origin of this software must not be misrepresented; you must not -** claim that you wrote the original software. If you use this software -** in a product, an acknowledgment in the product documentation would be -** appreciated but is not required. -** 2. Altered source versions must be plainly marked as such, and must not be -** misrepresented as being the original software. -** 3. This notice may not be removed or altered from any source distribution. -** -*/ - -#include "templates.h" -#include "doomtype.h" -#include "doomdef.h" -#include "r_defs.h" -#include "r_draw.h" -#include "r_main.h" -#include "r_things.h" -#include "v_video.h" -#include "r_draw_rgba.h" -#include "r_drawers.h" - -namespace swrenderer -{ - WorkerThreadData DrawColumnRt1LLVMCommand::ThreadData(DrawerThread *thread) - { - WorkerThreadData d; - d.core = thread->core; - d.num_cores = thread->num_cores; - d.pass_start_y = thread->pass_start_y; - d.pass_end_y = thread->pass_end_y; - d.temp = thread->dc_temp_rgba; - return d; - } - - DrawColumnRt1LLVMCommand::DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh) - { - using namespace drawerargs; - - args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx; - args.source = nullptr; - args.source2 = nullptr; - args.colormap = dc_colormap; - args.translation = dc_translation; - args.basecolors = (const uint32_t *)GPalette.BaseColors; - args.pitch = dc_pitch; - args.count = yh - yl + 1; - args.dest_y = yl; - args.iscale = dc_iscale; - args.texturefrac = hx; - args.light = LightBgra::calc_light_multiplier(dc_light); - args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); - args.srccolor = dc_srccolor_bgra; - args.srcalpha = dc_srcalpha >> (FRACBITS - 8); - args.destalpha = dc_destalpha >> (FRACBITS - 8); - args.light_red = dc_shade_constants.light_red; - args.light_green = dc_shade_constants.light_green; - args.light_blue = dc_shade_constants.light_blue; - args.light_alpha = dc_shade_constants.light_alpha; - args.fade_red = dc_shade_constants.fade_red; - args.fade_green = dc_shade_constants.fade_green; - args.fade_blue = dc_shade_constants.fade_blue; - args.fade_alpha = dc_shade_constants.fade_alpha; - args.desaturate = dc_shade_constants.desaturate; - args.flags = 0; - if (dc_shade_constants.simple_shade) - args.flags |= DrawColumnArgs::simple_shade; - if (args.source2 == nullptr) - args.flags |= DrawColumnArgs::nearest_filter; - - DetectRangeError(args.dest, args.dest_y, args.count); - } - - void DrawColumnRt1LLVMCommand::Execute(DrawerThread *thread) - { - WorkerThreadData d = ThreadData(thread); - Drawers::Instance()->DrawColumnRt1(&args, &d); - } - - FString DrawColumnRt1LLVMCommand::DebugInfo() - { - return "DrawColumnRt\n" + args.ToString(); - } - - ///////////////////////////////////////////////////////////////////////////// - - RtInitColsRGBACommand::RtInitColsRGBACommand(BYTE *buff) - { - this->buff = buff; - } - - void RtInitColsRGBACommand::Execute(DrawerThread *thread) - { - thread->dc_temp_rgba = buff == NULL ? thread->dc_temp_rgbabuff_rgba : (uint32_t*)buff; - } - - FString RtInitColsRGBACommand::DebugInfo() - { - return "RtInitCols"; - } - - ///////////////////////////////////////////////////////////////////////////// - - template - DrawColumnHorizRGBACommand::DrawColumnHorizRGBACommand() - { - using namespace drawerargs; - - _count = dc_count; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _source = (const InputPixelType *)dc_source; - _x = dc_x; - _yl = dc_yl; - _yh = dc_yh; - } - - template - void DrawColumnHorizRGBACommand::Execute(DrawerThread *thread) - { - int count = _count; - uint32_t *dest; - fixed_t fracstep; - fixed_t frac; - - if (count <= 0) - return; - - { - int x = _x & 3; - dest = &thread->dc_temp_rgba[x + 4 * _yl]; - } - fracstep = _iscale; - frac = _texturefrac; - - const InputPixelType *source = _source; - - if (count & 1) { - *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; - } - if (count & 2) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest += 8; - } - if (count & 4) { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest += 16; - } - count >>= 3; - if (!count) return; - - do - { - dest[0] = source[frac >> FRACBITS]; frac += fracstep; - dest[4] = source[frac >> FRACBITS]; frac += fracstep; - dest[8] = source[frac >> FRACBITS]; frac += fracstep; - dest[12] = source[frac >> FRACBITS]; frac += fracstep; - dest[16] = source[frac >> FRACBITS]; frac += fracstep; - dest[20] = source[frac >> FRACBITS]; frac += fracstep; - dest[24] = source[frac >> FRACBITS]; frac += fracstep; - dest[28] = source[frac >> FRACBITS]; frac += fracstep; - dest += 32; - } while (--count); - } - - template - FString DrawColumnHorizRGBACommand::DebugInfo() - { - return "DrawColumnHoriz"; - } - - // Generate code for the versions we use: - template class DrawColumnHorizRGBACommand; - template class DrawColumnHorizRGBACommand; - - ///////////////////////////////////////////////////////////////////////////// - - FillColumnHorizRGBACommand::FillColumnHorizRGBACommand() - { - using namespace drawerargs; - - _x = dc_x; - _count = dc_count; - _color = GPalette.BaseColors[dc_color].d | (uint32_t)0xff000000; - _yl = dc_yl; - _yh = dc_yh; - } - - void FillColumnHorizRGBACommand::Execute(DrawerThread *thread) - { - int count = _count; - uint32_t color = _color; - uint32_t *dest; - - if (count <= 0) - return; - - { - int x = _x & 3; - dest = &thread->dc_temp_rgba[x + 4 * _yl]; - } - - if (count & 1) { - *dest = color; - dest += 4; - } - if (!(count >>= 1)) - return; - do { - dest[0] = color; dest[4] = color; - dest += 8; - } while (--count); - } - - FString FillColumnHorizRGBACommand::DebugInfo() - { - return "FillColumnHoriz"; - } - - ///////////////////////////////////////////////////////////////////////////// - - namespace - { - static uint32_t particle_texture[16 * 16] = - { - 1*1, 2*1, 3*1, 4*1, 5*1, 6*1, 7*1, 8*1, 8*1, 7*1, 6*1, 5*1, 4*1, 3*1, 2*1, 1*1, - 1*2, 2*2, 3*2, 4*2, 5*2, 6*2, 7*2, 8*2, 8*2, 7*2, 6*2, 5*2, 4*2, 3*2, 2*2, 1*2, - 1*3, 2*3, 3*3, 4*3, 5*3, 6*3, 7*3, 8*3, 8*3, 7*3, 6*3, 5*3, 4*3, 3*3, 2*3, 1*3, - 1*4, 2*4, 3*4, 4*4, 5*4, 6*4, 7*4, 8*4, 8*4, 7*4, 6*4, 5*4, 4*4, 3*4, 2*4, 1*4, - 1*5, 2*5, 3*5, 4*5, 5*5, 6*5, 7*5, 8*5, 8*5, 7*5, 6*5, 5*5, 4*5, 3*5, 2*5, 1*5, - 1*6, 2*6, 3*6, 4*6, 5*6, 6*6, 7*6, 8*6, 8*6, 7*6, 6*6, 5*6, 4*6, 3*6, 2*6, 1*6, - 1*7, 2*7, 3*7, 4*7, 5*7, 6*7, 7*7, 8*7, 8*7, 7*7, 6*7, 5*7, 4*7, 3*7, 2*7, 1*7, - 1*8, 2*8, 3*8, 4*8, 5*8, 6*8, 7*8, 8*8, 8*8, 7*8, 6*8, 5*8, 4*8, 3*8, 2*8, 1*8, - 1*8, 2*8, 3*8, 4*8, 5*8, 6*8, 7*8, 8*8, 8*8, 7*8, 6*8, 5*8, 4*8, 3*8, 2*8, 1*8, - 1*7, 2*7, 3*7, 4*7, 5*7, 6*7, 7*7, 8*7, 8*7, 7*7, 6*7, 5*7, 4*7, 3*7, 2*7, 1*7, - 1*6, 2*6, 3*6, 4*6, 5*6, 6*6, 7*6, 8*6, 8*6, 7*6, 6*6, 5*6, 4*6, 3*6, 2*6, 1*6, - 1*5, 2*5, 3*5, 4*5, 5*5, 6*5, 7*5, 8*5, 8*5, 7*5, 6*5, 5*5, 4*5, 3*5, 2*5, 1*5, - 1*4, 2*4, 3*4, 4*4, 5*4, 6*4, 7*4, 8*4, 8*4, 7*4, 6*4, 5*4, 4*4, 3*4, 2*4, 1*4, - 1*3, 2*3, 3*3, 4*3, 5*3, 6*3, 7*3, 8*3, 8*3, 7*3, 6*3, 5*3, 4*3, 3*3, 2*3, 1*3, - 1*2, 2*2, 3*2, 4*2, 5*2, 6*2, 7*2, 8*2, 8*2, 7*2, 6*2, 5*2, 4*2, 3*2, 2*2, 1*2, - 1*1, 2*1, 3*1, 4*1, 5*1, 6*1, 7*1, 8*1, 8*1, 7*1, 6*1, 5*1, 4*1, 3*1, 2*1, 1*1 - }; - } - - DrawParticleColumnRGBACommand::DrawParticleColumnRGBACommand(uint32_t *dest, int dest_y, int pitch, int count, uint32_t fg, uint32_t alpha, uint32_t fracposx) - { - _dest = dest; - _pitch = pitch; - _count = count; - _fg = fg; - _alpha = alpha; - _fracposx = fracposx; - _dest_y = dest_y; - } - - void DrawParticleColumnRGBACommand::Execute(DrawerThread *thread) - { - int count = thread->count_for_thread(_dest_y, _count); - if (count <= 0) - return; - - uint32_t *dest = thread->dest_for_thread(_dest_y, _pitch, _dest); - int pitch = _pitch * thread->num_cores; - - const uint32_t *source = &particle_texture[(_fracposx >> FRACBITS) * 16]; - uint32_t particle_alpha = _alpha; - - uint32_t fracstep = 16 * FRACUNIT / _count; - uint32_t fracpos = fracstep * thread->skipped_by_thread(_dest_y) + fracstep / 2; - fracstep *= thread->num_cores; - - uint32_t fg_red = (_fg >> 16) & 0xff; - uint32_t fg_green = (_fg >> 8) & 0xff; - uint32_t fg_blue = _fg & 0xff; - - for (int y = 0; y < count; y++) - { - uint32_t alpha = (source[fracpos >> FRACBITS] * particle_alpha) >> 6; - uint32_t inv_alpha = 256 - alpha; - - uint32_t bg_red = (*dest >> 16) & 0xff; - uint32_t bg_green = (*dest >> 8) & 0xff; - uint32_t bg_blue = (*dest) & 0xff; - - uint32_t red = (fg_red * alpha + bg_red * inv_alpha) / 256; - uint32_t green = (fg_green * alpha + bg_green * inv_alpha) / 256; - uint32_t blue = (fg_blue * alpha + bg_blue * inv_alpha) / 256; - - *dest = 0xff000000 | (red << 16) | (green << 8) | blue; - dest += pitch; - fracpos += fracstep; - } - } - - FString DrawParticleColumnRGBACommand::DebugInfo() - { - return "DrawParticle"; - } -} diff --git a/src/r_main.cpp b/src/r_main.cpp index c67b48a1b..a83b2f190 100644 --- a/src/r_main.cpp +++ b/src/r_main.cpp @@ -169,11 +169,6 @@ void (*fuzzcolfunc) (void); void (*transcolfunc) (void); void (*spanfunc) (void); -void (*hcolfunc_pre) (void); -void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -void (*hcolfunc_post4) (int sx, int yl, int yh); - cycle_t WallCycles, PlaneCycles, MaskedCycles, WallScanCycles; // PRIVATE DATA DEFINITIONS ------------------------------------------------ @@ -853,17 +848,11 @@ void R_RenderActorView (AActor *actor, bool dontmaplines) // [RH] Show off segs if r_drawflat is 1 if (r_drawflat) { - hcolfunc_pre = R_FillColumnHoriz; - hcolfunc_post1 = rt_copy1col; - hcolfunc_post4 = rt_copy4cols; colfunc = R_FillColumn; spanfunc = R_FillSpan; } else { - hcolfunc_pre = R_DrawColumnHoriz; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; colfunc = basecolfunc; spanfunc = R_DrawSpan; } diff --git a/src/r_main.h b/src/r_main.h index 37ead76ce..daf60c16d 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -124,12 +124,6 @@ extern void (*transcolfunc) (void); // No shadow effects on floors. extern void (*spanfunc) (void); -// [RH] Function pointers for the horizontal column drawers. -extern void (*hcolfunc_pre) (void); -extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); -extern void (*hcolfunc_post4) (int sx, int yl, int yh); - void R_InitTextureMapping (); diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index 72851b6b3..5da23297b 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -377,7 +377,6 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread) thread_data.num_cores = thread->num_cores; thread_data.pass_start_y = thread->pass_start_y; thread_data.pass_end_y = thread->pass_end_y; - thread_data.temp = thread->dc_temp_rgba; thread_data.FullSpans = thread->FullSpansBuffer.data(); thread_data.PartialBlocks = thread->PartialBlocksBuffer.data(); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index edad2aecc..a2d02c3f9 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -2323,8 +2323,6 @@ static void R_RenderDecal (side_t *wall, DBaseDecal *decal, drawseg_t *clipper, } while (needrepeat--); colfunc = basecolfunc; - hcolfunc_post1 = rt_map1col; - hcolfunc_post4 = rt_map4cols; R_FinishSetPatchStyle (); done: diff --git a/src/r_things.cpp b/src/r_things.cpp index 22d6b7ac6..7b945a44c 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -366,18 +366,12 @@ void R_DrawMaskedColumnBgra(FTexture *tex, fixed_t col, bool useRt, bool unmaske double v = ((dc_yl + 0.5 - sprtopscreen) / spryscale) / tex->GetHeight(); dc_texturefrac = (uint32_t)(v * (1 << 30)); - if (useRt) - hcolfunc_pre(); - else - colfunc(); + colfunc(); } span++; } dc_iscale = saved_iscale; - - if (sprflipvert && useRt) - rt_flip_posts(); } void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) @@ -447,16 +441,10 @@ void R_DrawMaskedColumn (FTexture *tex, fixed_t col, bool useRt, bool unmasked) else if (dc_iscale < 0) dc_count = MIN(dc_count, (dc_texturefrac - dc_iscale) / (-dc_iscale)); - if (useRt) - hcolfunc_pre(); - else - colfunc (); + colfunc (); } span++; } - - if (sprflipvert && useRt) - rt_flip_posts(); } // [ZZ] @@ -705,6 +693,7 @@ void R_WallSpriteColumn (bool useRt) void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop, short *clipbot) { +#if 0 int flags = 0; // Do setup for blending. @@ -772,6 +761,7 @@ void R_DrawVisVoxel(vissprite_t *spr, int minslabz, int maxslabz, short *cliptop R_FinishSetPatchStyle(); NetUpdate(); +#endif } // @@ -2857,6 +2847,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, fixed_t daxscale, fixed_t dayscale, FVoxel *voxobj, FSWColormap *colormap, int colormapnum, short *daumost, short *dadmost, int minslabz, int maxslabz, int flags) { +#if 0 int i, j, k, x, y, syoff, ggxstart, ggystart, nxoff; fixed_t cosang, sinang, sprcosang, sprsinang; int backx, backy, gxinc, gyinc; @@ -3184,6 +3175,7 @@ void R_DrawVoxel(const FVector3 &globalpos, FAngle viewangle, } } } +#endif } //========================================================================== diff --git a/src/r_thread.h b/src/r_thread.h index ea1ceaa29..c3e818abb 100644 --- a/src/r_thread.h +++ b/src/r_thread.h @@ -45,9 +45,6 @@ class DrawerThread public: DrawerThread() { - dc_temp = dc_temp_buff; - dc_temp_rgba = dc_temp_rgbabuff_rgba; - FullSpansBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); PartialBlocksBuffer.resize(MAXWIDTH / 8 * (MAXHEIGHT / 8)); } @@ -64,14 +61,6 @@ public: int pass_start_y = 0; int pass_end_y = MAXHEIGHT; - // Working buffer used by Rt drawers - uint8_t dc_temp_buff[MAXHEIGHT * 4]; - uint8_t *dc_temp = nullptr; - - // Working buffer used by Rt drawers, true color edition - uint32_t dc_temp_rgbabuff_rgba[MAXHEIGHT * 4]; - uint32_t *dc_temp_rgba = nullptr; - // Working buffer used by the tilted (sloped) span drawer const uint8_t *tiltlighting[MAXWIDTH]; diff --git a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp index 177074dad..6e00528ae 100644 --- a/tools/drawergen/fixedfunction/drawcolumncodegen.cpp +++ b/tools/drawergen/fixedfunction/drawcolumncodegen.cpp @@ -32,7 +32,7 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" -void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data) +void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(true); source = args[0][1].load(true); @@ -43,12 +43,9 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met pitch = args[0][6].load(true); count = args[0][7].load(true); dest_y = args[0][8].load(true); - if (method == DrawColumnMethod::Normal) - { - iscale = args[0][9].load(true); - texturefracx = args[0][10].load(true); - textureheight = args[0][11].load(true); - } + iscale = args[0][9].load(true); + texturefracx = args[0][10].load(true); + textureheight = args[0][11].load(true); texturefrac = args[0][12].load(true); light = args[0][13].load(true); color = SSAVec4i::unpack(args[0][14].load(true)); @@ -81,49 +78,32 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod met count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); pitch = pitch * thread.num_cores; - if (method == DrawColumnMethod::Normal) - { - stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); - iscale = iscale * thread.num_cores; - one = (1 << 30) / textureheight; - SSAIfBlock branch; - branch.if_block(is_simple_shade); - LoopShade(variant, method, true); - branch.else_block(); - LoopShade(variant, method, false); - branch.end_block(); - } - else - { - source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4]; + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + iscale = iscale * thread.num_cores; + one = (1 << 30) / textureheight; - SSAIfBlock branch; - branch.if_block(is_simple_shade); - Loop(variant, method, true, true); - branch.else_block(); - Loop(variant, method, false, true); - branch.end_block(); - } -} - -void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) -{ SSAIfBlock branch; - branch.if_block(is_nearest_filter); - Loop(variant, method, isSimpleShade, true); + branch.if_block(is_simple_shade); + LoopShade(variant, true); branch.else_block(); - stack_frac.store(stack_frac.load() - (one >> 1)); - Loop(variant, method, isSimpleShade, false); + LoopShade(variant, false); branch.end_block(); } -void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter) +void DrawColumnCodegen::LoopShade(DrawColumnVariant variant, bool isSimpleShade) { - SSAInt sincr; - if (method != DrawColumnMethod::Normal) - sincr = thread.num_cores * 4; + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + Loop(variant, isSimpleShade, true); + branch.else_block(); + stack_frac.store(stack_frac.load() - (one >> 1)); + Loop(variant, isSimpleShade, false); + branch.end_block(); +} +void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter) +{ stack_index.store(SSAInt(0)); { SSAForBlock loop; @@ -131,56 +111,21 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, loop.loop_block(index < count); SSAInt sample_index, frac; - if (method == DrawColumnMethod::Normal) - { - frac = stack_frac.load(); - if (IsPaletteInput(variant)) - sample_index = frac >> FRACBITS; - else - sample_index = frac; - } + frac = stack_frac.load(); + if (IsPaletteInput(variant)) + sample_index = frac >> FRACBITS; else - { - sample_index = index * sincr * 4; - } + sample_index = frac; SSAInt offset = index * pitch * 4; - SSAVec4i bgcolor[4]; + SSAVec4i bgcolor = dest[offset].load_vec4ub(false); - int numColumns = (method == DrawColumnMethod::Rt4) ? 4 : 1; + SSAVec4i outcolor = ProcessPixel(sample_index, bgcolor, variant, isSimpleShade, isNearestFilter); - if (numColumns == 4) - { - SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false); - SSAVec8s bg0 = SSAVec8s::extendlo(bg); - SSAVec8s bg1 = SSAVec8s::extendhi(bg); - bgcolor[0] = SSAVec4i::extendlo(bg0); - bgcolor[1] = SSAVec4i::extendhi(bg0); - bgcolor[2] = SSAVec4i::extendlo(bg1); - bgcolor[3] = SSAVec4i::extendhi(bg1); - } - else - { - bgcolor[0] = dest[offset].load_vec4ub(false); - } - - SSAVec4i outcolor[4]; - for (int i = 0; i < numColumns; i++) - outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, method, isSimpleShade, isNearestFilter); - - if (numColumns == 4) - { - SSAVec16ub packedcolor(SSAVec8s(outcolor[0], outcolor[1]), SSAVec8s(outcolor[2], outcolor[3])); - dest[offset].store_unaligned_vec16ub(packedcolor); - } - else - { - dest[offset].store_vec4ub(outcolor[0]); - } + dest[offset].store_vec4ub(outcolor); stack_index.store(index.add(SSAInt(1), true, true)); - if (method == DrawColumnMethod::Normal) - stack_frac.store(frac + iscale); + stack_frac.store(frac + iscale); loop.end_block(); } } @@ -212,7 +157,7 @@ bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant) } } -SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter) +SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter) { SSAInt alpha, inv_alpha; SSAVec4i fg; @@ -220,22 +165,22 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, { default: case DrawColumnVariant::DrawCopy: - return blend_copy(Sample(sample_index, method, isNearestFilter)); + return blend_copy(Sample(sample_index, isNearestFilter)); case DrawColumnVariant::Draw: - return blend_copy(Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade)); + return blend_copy(Shade(Sample(sample_index, isNearestFilter), isSimpleShade)); case DrawColumnVariant::DrawAdd: case DrawColumnVariant::DrawAddClamp: - fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); + fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); return blend_add(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawShaded: alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; inv_alpha = 256 - alpha; return blend_add(color, bgcolor, alpha, inv_alpha); case DrawColumnVariant::DrawSubClamp: - fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); + fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); return blend_sub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawRevSubClamp: - fg = Shade(Sample(sample_index, method, isNearestFilter), isSimpleShade); + fg = Shade(Sample(sample_index, isNearestFilter), isSimpleShade); return blend_revsub(fg, bgcolor, srcalpha, calc_blend_bgalpha(fg, destalpha)); case DrawColumnVariant::DrawTranslated: return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); @@ -311,23 +256,16 @@ SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolo } } -SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter) +SSAVec4i DrawColumnCodegen::Sample(SSAInt frac, bool isNearestFilter) { - if (method == DrawColumnMethod::Normal) + if (isNearestFilter) { - if (isNearestFilter) - { - SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; - return source[sample_index * 4].load_vec4ub(false); - } - else - { - return SampleLinear(source, source2, texturefracx, frac, one, textureheight); - } + SSAInt sample_index = (((frac << 2) >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index * 4].load_vec4ub(false); } else { - return source[frac].load_vec4ub(true); + return SampleLinear(source, source2, texturefracx, frac, one, textureheight); } } diff --git a/tools/drawergen/fixedfunction/drawcolumncodegen.h b/tools/drawergen/fixedfunction/drawcolumncodegen.h index 2c44edc5c..905654911 100644 --- a/tools/drawergen/fixedfunction/drawcolumncodegen.h +++ b/tools/drawergen/fixedfunction/drawcolumncodegen.h @@ -45,24 +45,17 @@ enum class DrawColumnVariant DrawRevSubClampTranslated }; -enum class DrawColumnMethod -{ - Normal, - Rt1, - Rt4 -}; - class DrawColumnCodegen : public DrawerCodegen { public: - void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data); + void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data); private: - void LoopShade(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); - void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter); - SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade, bool isNearestFilter); + void LoopShade(DrawColumnVariant variant, bool isSimpleShade); + void Loop(DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade, bool isNearestFilter); SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); - SSAVec4i Sample(SSAInt frac, DrawColumnMethod method, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, bool isNearestFilter); SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAInt ColormapSample(SSAInt frac); SSAVec4i TranslateSample(SSAInt frac); diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 5c3bf05a4..099919997 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -29,46 +29,22 @@ LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, { mProgram.CreateModule(); - CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); - CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); - CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); - CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); @@ -102,7 +78,7 @@ LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, ObjectFile = mProgram.GenerateObjectFile(triple, cpuName, features); } -void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) +void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -113,7 +89,7 @@ void LLVMDrawers::CodegenDrawColumn(const char *name, DrawColumnVariant variant, function.create_public(); DrawColumnCodegen codegen; - codegen.Generate(variant, method, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h index f546d1dff..a7f1c8625 100644 --- a/tools/drawergen/llvmdrawers.h +++ b/tools/drawergen/llvmdrawers.h @@ -47,7 +47,7 @@ public: std::vector ObjectFile; private: - void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); + void CodegenDrawColumn(const char *name, DrawColumnVariant variant); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns);