From 49903af394a3b0215b06e56e855c8db29cb510f4 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Dec 2016 07:18:04 +0100 Subject: [PATCH] Remove 4 column wall drawers --- src/swrenderer/drawers/r_draw.cpp | 94 +-- src/swrenderer/drawers/r_draw.h | 26 +- src/swrenderer/drawers/r_draw_pal.cpp | 672 ------------------ src/swrenderer/drawers/r_draw_pal.h | 27 - src/swrenderer/drawers/r_draw_rgba.cpp | 66 -- src/swrenderer/drawers/r_draw_rgba.h | 21 - src/swrenderer/drawers/r_drawers.cpp | 12 - src/swrenderer/drawers/r_drawers.h | 8 - src/swrenderer/scene/r_plane.cpp | 83 +-- src/swrenderer/scene/r_walldraw.cpp | 246 +------ .../fixedfunction/drawskycodegen.cpp | 67 +- .../drawergen/fixedfunction/drawskycodegen.h | 18 +- .../fixedfunction/drawwallcodegen.cpp | 116 +-- .../drawergen/fixedfunction/drawwallcodegen.h | 30 +- tools/drawergen/llvmdrawers.cpp | 32 +- tools/drawergen/llvmdrawers.h | 4 +- 16 files changed, 119 insertions(+), 1403 deletions(-) diff --git a/src/swrenderer/drawers/r_draw.cpp b/src/swrenderer/drawers/r_draw.cpp index 626098245..b49427d98 100644 --- a/src/swrenderer/drawers/r_draw.cpp +++ b/src/swrenderer/drawers/r_draw.cpp @@ -496,30 +496,26 @@ namespace swrenderer return tex->GetColumn(col, nullptr); } - bool R_GetTransMaskDrawers(void(**drawCol1)(), void(**drawCol4)()) + bool R_GetTransMaskDrawers(void(**drawColumn)()) { if (colfunc == R_DrawAddColumn) { - *drawCol1 = R_DrawWallAddCol1; - *drawCol4 = R_DrawWallAddCol4; + *drawColumn = R_DrawWallAddColumn; return true; } if (colfunc == R_DrawAddClampColumn) { - *drawCol1 = R_DrawWallAddClampCol1; - *drawCol4 = R_DrawWallAddClampCol4; + *drawColumn = R_DrawWallAddClampColumn; return true; } if (colfunc == R_DrawSubClampColumn) { - *drawCol1 = R_DrawWallSubClampCol1; - *drawCol4 = R_DrawWallSubClampCol4; + *drawColumn = R_DrawWallSubClampColumn; return true; } if (colfunc == R_DrawRevSubClampColumn) { - *drawCol1 = R_DrawWallRevSubClampCol1; - *drawCol4 = R_DrawWallRevSubClampCol4; + *drawColumn = R_DrawWallRevSubClampColumn; return true; } return false; @@ -637,7 +633,7 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////// - void R_DrawWallCol1() + void R_DrawWallColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -645,15 +641,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallMaskedCol1() + void R_DrawWallMaskedColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -661,15 +649,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallMaskedCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallAddCol1() + void R_DrawWallAddColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -679,15 +659,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallAddCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallAddClampCol1() + void R_DrawWallAddClampColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -695,15 +667,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallAddClampCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallSubClampCol1() + void R_DrawWallSubClampColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -711,15 +675,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallSubClampCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawWallRevSubClampCol1() + void R_DrawWallRevSubClampColumn() { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(); @@ -727,15 +683,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(); } - void R_DrawWallRevSubClampCol4() - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(); - else - DrawerCommandQueue::QueueCommand(); - } - - void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); @@ -743,15 +691,7 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } - void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - - void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom) + void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom) { if (r_swtruecolor) DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); @@ -759,14 +699,6 @@ namespace swrenderer DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); } - void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom) - { - if (r_swtruecolor) - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - else - DrawerCommandQueue::QueueCommand(solid_top, solid_bottom); - } - void R_DrawColumn() { if (r_swtruecolor) diff --git a/src/swrenderer/drawers/r_draw.h b/src/swrenderer/drawers/r_draw.h index c508aa268..580bcd2f9 100644 --- a/src/swrenderer/drawers/r_draw.h +++ b/src/swrenderer/drawers/r_draw.h @@ -130,7 +130,7 @@ namespace swrenderer bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color); bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color); void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade - bool R_GetTransMaskDrawers(void(**drawCol1)(), void(**drawCol4)()); + bool R_GetTransMaskDrawers(void(**drawColumn)()); const uint8_t *R_GetColumn(FTexture *tex, int col); @@ -165,23 +165,15 @@ namespace swrenderer void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip); void R_FillSpan(); - void R_DrawWallCol1(); - void R_DrawWallCol4(); - void R_DrawWallMaskedCol1(); - void R_DrawWallMaskedCol4(); - void R_DrawWallAddCol1(); - void R_DrawWallAddCol4(); - void R_DrawWallAddClampCol1(); - void R_DrawWallAddClampCol4(); - void R_DrawWallSubClampCol1(); - void R_DrawWallSubClampCol4(); - void R_DrawWallRevSubClampCol1(); - void R_DrawWallRevSubClampCol4(); + void R_DrawWallColumn(); + void R_DrawWallMaskedColumn(); + void R_DrawWallAddColumn(); + void R_DrawWallAddClampColumn(); + void R_DrawWallSubClampColumn(); + void R_DrawWallRevSubClampColumn(); - void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom); - void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom); + void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom); // Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color) void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade); diff --git a/src/swrenderer/drawers/r_draw_pal.cpp b/src/swrenderer/drawers/r_draw_pal.cpp index c8adf3965..dd1fdcc52 100644 --- a/src/swrenderer/drawers/r_draw_pal.cpp +++ b/src/swrenderer/drawers/r_draw_pal.cpp @@ -110,25 +110,6 @@ namespace swrenderer _step_viewpos_z = dc_viewpos_step.Z; } - PalWall4Command::PalWall4Command() - { - using namespace drawerargs; - - _dest = dc_dest; - _count = dc_count; - _pitch = dc_pitch; - _fracbits = dc_wall_fracbits; - for (int col = 0; col < 4; col++) - { - _colormap[col] = dc_wall_colormap[col]; - _source[col] = dc_wall_source[col]; - _iscale[col] = dc_wall_iscale[col]; - _texturefrac[col] = dc_wall_texturefrac[col]; - } - _srcblend = dc_srcblend; - _destblend = dc_destblend; - } - uint8_t PalWall1Command::AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material) { uint32_t lit_r = GPalette.BaseColors[fg].r; @@ -217,56 +198,6 @@ namespace swrenderer } } - void DrawWall4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - uint32_t place; - auto pal0 = _colormap[0]; - auto pal1 = _colormap[1]; - auto pal2 = _colormap[2]; - auto pal3 = _colormap[3]; - auto buf0 = _source[0]; - auto buf1 = _source[1]; - auto buf2 = _source[2]; - auto buf3 = _source[3]; - auto dc_wall_iscale0 = _iscale[0]; - auto dc_wall_iscale1 = _iscale[1]; - auto dc_wall_iscale2 = _iscale[2]; - auto dc_wall_iscale3 = _iscale[3]; - auto dc_wall_texturefrac0 = _texturefrac[0]; - auto dc_wall_texturefrac1 = _texturefrac[1]; - auto dc_wall_texturefrac2 = _texturefrac[2]; - auto dc_wall_texturefrac3 = _texturefrac[3]; - auto pitch = _pitch; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - dc_wall_texturefrac0 += dc_wall_iscale0 * skipped; - dc_wall_texturefrac1 += dc_wall_iscale1 * skipped; - dc_wall_texturefrac2 += dc_wall_iscale2 * skipped; - dc_wall_texturefrac3 += dc_wall_iscale3 * skipped; - dc_wall_iscale0 *= thread->num_cores; - dc_wall_iscale1 *= thread->num_cores; - dc_wall_iscale2 *= thread->num_cores; - dc_wall_iscale3 *= thread->num_cores; - pitch *= thread->num_cores; - - do - { - dest[0] = pal0[buf0[(place = dc_wall_texturefrac0) >> bits]]; dc_wall_texturefrac0 = place + dc_wall_iscale0; - dest[1] = pal1[buf1[(place = dc_wall_texturefrac1) >> bits]]; dc_wall_texturefrac1 = place + dc_wall_iscale1; - dest[2] = pal2[buf2[(place = dc_wall_texturefrac2) >> bits]]; dc_wall_texturefrac2 = place + dc_wall_iscale2; - dest[3] = pal3[buf3[(place = dc_wall_texturefrac3) >> bits]]; dc_wall_texturefrac3 = place + dc_wall_iscale3; - dest += pitch; - } while (--count); - } - void DrawWallMasked1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -326,58 +257,6 @@ namespace swrenderer } } - void DrawWallMasked4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - uint32_t place; - auto pal0 = _colormap[0]; - auto pal1 = _colormap[1]; - auto pal2 = _colormap[2]; - auto pal3 = _colormap[3]; - auto buf0 = _source[0]; - auto buf1 = _source[1]; - auto buf2 = _source[2]; - auto buf3 = _source[3]; - auto dc_wall_iscale0 = _iscale[0]; - auto dc_wall_iscale1 = _iscale[1]; - auto dc_wall_iscale2 = _iscale[2]; - auto dc_wall_iscale3 = _iscale[3]; - auto dc_wall_texturefrac0 = _texturefrac[0]; - auto dc_wall_texturefrac1 = _texturefrac[1]; - auto dc_wall_texturefrac2 = _texturefrac[2]; - auto dc_wall_texturefrac3 = _texturefrac[3]; - auto pitch = _pitch; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - dc_wall_texturefrac0 += dc_wall_iscale0 * skipped; - dc_wall_texturefrac1 += dc_wall_iscale1 * skipped; - dc_wall_texturefrac2 += dc_wall_iscale2 * skipped; - dc_wall_texturefrac3 += dc_wall_iscale3 * skipped; - dc_wall_iscale0 *= thread->num_cores; - dc_wall_iscale1 *= thread->num_cores; - dc_wall_iscale2 *= thread->num_cores; - dc_wall_iscale3 *= thread->num_cores; - pitch *= thread->num_cores; - - do - { - uint8_t pix; - - pix = buf0[(place = dc_wall_texturefrac0) >> bits]; if (pix) dest[0] = pal0[pix]; dc_wall_texturefrac0 = place + dc_wall_iscale0; - pix = buf1[(place = dc_wall_texturefrac1) >> bits]; if (pix) dest[1] = pal1[pix]; dc_wall_texturefrac1 = place + dc_wall_iscale1; - pix = buf2[(place = dc_wall_texturefrac2) >> bits]; if (pix) dest[2] = pal2[pix]; dc_wall_texturefrac2 = place + dc_wall_iscale2; - pix = buf3[(place = dc_wall_texturefrac3) >> bits]; if (pix) dest[3] = pal3[pix]; dc_wall_texturefrac3 = place + dc_wall_iscale3; - dest += pitch; - } while (--count); - } - void DrawWallAdd1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -440,72 +319,6 @@ namespace swrenderer } } - void DrawWallAdd4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - - uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; - uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int pitch = _pitch; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped; - dc_wall_iscale[i] *= thread->num_cores; - } - pitch *= thread->num_cores; - - if (!r_blendmethod) - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t fg = fg2rgb[_colormap[i][pix]]; - uint32_t bg = bg2rgb[dest[i]]; - fg = (fg + bg) | 0x1f07c1f; - dest[i] = RGB32k.All[fg & (fg >> 15)]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - else - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); - uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); - uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - } - void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -580,50 +393,6 @@ namespace swrenderer } } - void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - - uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; - uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int pitch = _pitch; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped; - dc_wall_iscale[i] *= thread->num_cores; - } - pitch *= thread->num_cores; - - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255); - uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255); - uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -697,76 +466,6 @@ namespace swrenderer } } - void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - - uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; - uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int pitch = _pitch; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped; - dc_wall_iscale[i] *= thread->num_cores; - } - pitch *= thread->num_cores; - - if (!r_blendmethod) - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t a = (fg2rgb[_colormap[i][pix]] | 0x40100400) - bg2rgb[dest[i]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a >> 15)]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - else - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - int r = clamp(-GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255); - int g = clamp(-GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255); - int b = clamp(-GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += pitch; - } while (--count); - } - } - void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread) { uint32_t fracstep = _iscale; @@ -840,76 +539,6 @@ namespace swrenderer } } - void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int bits = _fracbits; - - uint32_t *fg2rgb = _srcblend; - uint32_t *bg2rgb = _destblend; - - uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] }; - uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] }; - - count = thread->count_for_thread(_dest_y, count); - if (count <= 0) - return; - - int pitch = _pitch; - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped; - dc_wall_iscale[i] *= thread->num_cores; - } - pitch *= thread->num_cores; - - if (!r_blendmethod) - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_colormap[i][pix]]; - uint32_t b = a; - - b &= 0x40100400; - b = b - (b >> 5); - a &= b; - a |= 0x01f07c1f; - dest[i] = RGB32k.All[a & (a >> 15)]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += _pitch; - } while (--count); - } - else - { - do - { - for (int i = 0; i < 4; ++i) - { - uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits]; - if (pix != 0) - { - uint32_t r = clamp(GPalette.BaseColors[_colormap[i][pix]].r - GPalette.BaseColors[dest[i]].r, 0, 255); - uint32_t g = clamp(GPalette.BaseColors[_colormap[i][pix]].g - GPalette.BaseColors[dest[i]].g, 0, 255); - uint32_t b = clamp(GPalette.BaseColors[_colormap[i][pix]].b - GPalette.BaseColors[dest[i]].b, 0, 255); - dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2]; - } - dc_wall_texturefrac[i] += dc_wall_iscale[i]; - } - dest += _pitch; - } while (--count); - } - } - ///////////////////////////////////////////////////////////////////////// PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) : solid_top(solid_top), solid_bottom(solid_bottom) @@ -994,146 +623,6 @@ namespace swrenderer } } - void DrawSingleSky4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int pitch = _pitch; - const uint8_t *source0[4] = { _source[0], _source[1], _source[2], _source[3] }; - int textureheight0 = _sourceheight[0]; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)_texturefrac[0], (int32_t)_texturefrac[1], (int32_t)_texturefrac[2], (int32_t)_texturefrac[3] }; - int32_t fracstep[4] = { (int32_t)_iscale[0], (int32_t)_iscale[1], (int32_t)_iscale[2], (int32_t)_iscale[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; - uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int col = 0; col < 4; col++) - { - frac[col] += fracstep[col] * skipped; - fracstep[col] *= thread->num_cores; - } - pitch *= thread->num_cores; - int num_cores = thread->num_cores; - int index = skipped; - - // Top solid color: - while (index < start_fadetop_y) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - index += num_cores; - } - - // Top fade: - while (index < end_fadetop_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Textured center: - while (index < start_fadebottom_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - output[col] = source0[col][sample_index]; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Fade bottom: - while (index < end_fadebottom_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Bottom solid color: - while (index < count) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - index += num_cores; - } - } - void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread) { uint8_t *dest = _dest; @@ -1206,167 +695,6 @@ namespace swrenderer } } - void DrawDoubleSky4PalCommand::Execute(DrawerThread *thread) - { - uint8_t *dest = _dest; - int count = _count; - int pitch = _pitch; - const uint8_t *source0[4] = { _source[0], _source[1], _source[2], _source[3] }; - const uint8_t *source1[4] = { _source2[0], _source2[1], _source2[2], _source2[3] }; - int textureheight0 = _sourceheight[0]; - uint32_t maxtextureheight1 = _sourceheight[1] - 1; - const uint32_t *palette = (const uint32_t *)GPalette.BaseColors; - int32_t frac[4] = { (int32_t)_texturefrac[0], (int32_t)_texturefrac[1], (int32_t)_texturefrac[2], (int32_t)_texturefrac[3] }; - int32_t fracstep[4] = { (int32_t)_iscale[0], (int32_t)_iscale[1], (int32_t)_iscale[2], (int32_t)_iscale[3] }; - uint8_t output[4]; - - int start_fade = 2; // How fast it should fade out - - int solid_top_r = RPART(solid_top); - int solid_top_g = GPART(solid_top); - int solid_top_b = BPART(solid_top); - int solid_bottom_r = RPART(solid_bottom); - int solid_bottom_g = GPART(solid_bottom); - int solid_bottom_b = BPART(solid_bottom); - uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)]; - uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)]; - solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill; - solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill; - - // Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color: - int fade_length = (1 << (24 - start_fade)); - int start_fadetop_y = (-frac[0]) / fracstep[0]; - int end_fadetop_y = (fade_length - frac[0]) / fracstep[0]; - int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0]; - int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0]; - for (int col = 1; col < 4; col++) - { - start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]); - end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]); - start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]); - end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]); - } - start_fadetop_y = clamp(start_fadetop_y, 0, count); - end_fadetop_y = clamp(end_fadetop_y, 0, count); - start_fadebottom_y = clamp(start_fadebottom_y, 0, count); - end_fadebottom_y = clamp(end_fadebottom_y, 0, count); - - int skipped = thread->skipped_by_thread(_dest_y); - dest = thread->dest_for_thread(_dest_y, pitch, dest); - for (int col = 0; col < 4; col++) - { - frac[col] += fracstep[col] * skipped; - fracstep[col] *= thread->num_cores; - } - pitch *= thread->num_cores; - int num_cores = thread->num_cores; - int index = skipped; - - // Top solid color: - while (index < start_fadetop_y) - { - *((uint32_t*)dest) = solid_top_fill; - dest += pitch; - for (int col = 0; col < 4; col++) - frac[col] += fracstep[col]; - index += num_cores; - } - - // Top fade: - while (index < end_fadetop_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0); - int inv_alpha_top = 256 - alpha_top; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8; - c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8; - c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8; - output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Textured center: - while (index < start_fadebottom_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - frac[col] += fracstep[col]; - } - - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Fade bottom: - while (index < end_fadebottom_y) - { - for (int col = 0; col < 4; col++) - { - uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS; - uint8_t fg = source0[col][sample_index]; - if (fg == 0) - { - uint32_t sample_index2 = MIN(sample_index, maxtextureheight1); - fg = source1[col][sample_index2]; - } - output[col] = fg; - - uint32_t c = palette[fg]; - int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0); - int inv_alpha_bottom = 256 - alpha_bottom; - int c_red = RPART(c); - int c_green = GPART(c); - int c_blue = BPART(c); - c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8; - c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8; - c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8; - output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)]; - - frac[col] += fracstep[col]; - } - *((uint32_t*)dest) = *((uint32_t*)output); - dest += pitch; - index += num_cores; - } - - // Bottom solid color: - while (index < count) - { - *((uint32_t*)dest) = solid_bottom_fill; - dest += pitch; - index += num_cores; - } - } - ///////////////////////////////////////////////////////////////////////// PalColumnCommand::PalColumnCommand() diff --git a/src/swrenderer/drawers/r_draw_pal.h b/src/swrenderer/drawers/r_draw_pal.h index 79152665c..539cf430d 100644 --- a/src/swrenderer/drawers/r_draw_pal.h +++ b/src/swrenderer/drawers/r_draw_pal.h @@ -32,37 +32,12 @@ namespace swrenderer float _step_viewpos_z; }; - class PalWall4Command : public DrawerCommand - { - public: - PalWall4Command(); - FString DebugInfo() override { return "PalWallCommand"; } - - protected: - uint8_t *_dest; - int _count; - int _pitch; - int _fracbits; - uint8_t *_colormap[4]; - const uint8_t *_source[4]; - uint32_t _iscale[4]; - uint32_t _texturefrac[4]; - uint32_t *_srcblend; - uint32_t *_destblend; - }; - class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; }; - class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; }; class PalSkyCommand : public DrawerCommand { @@ -85,9 +60,7 @@ namespace swrenderer }; class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; - class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; - class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; }; class PalColumnCommand : public DrawerCommand { diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index b3e970a6a..5cdaea108 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -181,72 +181,6 @@ namespace swrenderer ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData DrawWall4LLVMCommand::ThreadData(DrawerThread *thread) - { - WorkerThreadData d; - d.core = thread->core; - d.num_cores = thread->num_cores; - d.pass_start_y = thread->pass_start_y; - d.pass_end_y = thread->pass_end_y; - return d; - } - - DrawWall4LLVMCommand::DrawWall4LLVMCommand() - { - using namespace drawerargs; - - args.dest = (uint32_t*)dc_dest; - args.dest_y = _dest_y; - args.count = dc_count; - args.pitch = dc_pitch; - args.light_red = dc_shade_constants.light_red; - args.light_green = dc_shade_constants.light_green; - args.light_blue = dc_shade_constants.light_blue; - args.light_alpha = dc_shade_constants.light_alpha; - args.fade_red = dc_shade_constants.fade_red; - args.fade_green = dc_shade_constants.fade_green; - args.fade_blue = dc_shade_constants.fade_blue; - args.fade_alpha = dc_shade_constants.fade_alpha; - args.desaturate = dc_shade_constants.desaturate; - for (int i = 0; i < 4; i++) - { - args.texturefrac[i] = dc_wall_texturefrac[i]; - args.iscale[i] = dc_wall_iscale[i]; - args.texturefracx[i] = dc_wall_texturefracx[i]; - args.textureheight[i] = dc_wall_sourceheight[i]; - args.source[i] = (const uint32_t *)dc_wall_source[i]; - args.source2[i] = (const uint32_t *)dc_wall_source2[i]; - args.light[i] = LightBgra::calc_light_multiplier(dc_wall_light[i]); - } - args.srcalpha = dc_srcalpha >> (FRACBITS - 8); - args.destalpha = dc_destalpha >> (FRACBITS - 8); - args.flags = 0; - if (dc_shade_constants.simple_shade) - args.flags |= DrawWallArgs::simple_shade; - if (args.source2[0] == nullptr) - args.flags |= DrawWallArgs::nearest_filter; - - args.z = 0.0f; - args.step_z = 0.0f; - args.dynlights = nullptr; - args.num_dynlights = 0; - - DetectRangeError(args.dest, args.dest_y, args.count); - } - - void DrawWall4LLVMCommand::Execute(DrawerThread *thread) - { - WorkerThreadData d = ThreadData(thread); - Drawers::Instance()->vlinec4(&args, &d); - } - - FString DrawWall4LLVMCommand::DebugInfo() - { - return "DrawWall4\n" + args.ToString(); - } - - ///////////////////////////////////////////////////////////////////////////// - WorkerThreadData DrawWall1LLVMCommand::ThreadData(DrawerThread *thread) { WorkerThreadData d; diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 1364d537b..cd3bb7c1d 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -117,20 +117,6 @@ namespace swrenderer void Execute(DrawerThread *thread) override; }; - class DrawWall4LLVMCommand : public DrawerCommand - { - protected: - DrawWallArgs args; - - WorkerThreadData ThreadData(DrawerThread *thread); - - public: - DrawWall4LLVMCommand(); - - void Execute(DrawerThread *thread) override; - FString DebugInfo() override; - }; - class DrawWall1LLVMCommand : public DrawerCommand { protected: @@ -171,11 +157,6 @@ namespace swrenderer FString DebugInfo() override; }; - DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); - DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); @@ -197,9 +178,7 @@ namespace swrenderer DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand); - DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand); DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand); - DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand); class DrawFuzzColumnRGBACommand : public DrawerCommand { diff --git a/src/swrenderer/drawers/r_drawers.cpp b/src/swrenderer/drawers/r_drawers.cpp index e61e95183..57c33a5be 100644 --- a/src/swrenderer/drawers/r_drawers.cpp +++ b/src/swrenderer/drawers/r_drawers.cpp @@ -78,9 +78,7 @@ extern "C" void DrawSpanAddClamp_SSE2(const DrawSpanArgs *); void DrawSpanMaskedAddClamp_SSE2(const DrawSpanArgs *); void vlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void vlinec4_SSE2(const DrawWallArgs *, const WorkerThreadData *); void mvlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *); - void mvlinec4_SSE2(const DrawWallArgs *, const WorkerThreadData *); void tmvline1_add_SSE2(const DrawWallArgs *, const WorkerThreadData *); void tmvline4_add_SSE2(const DrawWallArgs *, const WorkerThreadData *); void tmvline1_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); @@ -90,9 +88,7 @@ extern "C" void tmvline1_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); void tmvline4_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *); void DrawSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); - void DrawSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *); void DrawDoubleSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *); - void DrawDoubleSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *); void TriDraw8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDraw8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); void TriDraw8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *); @@ -182,21 +178,13 @@ Drawers::Drawers() DrawSpanAddClamp = DrawSpanAddClamp_SSE2; DrawSpanMaskedAddClamp = DrawSpanMaskedAddClamp_SSE2; vlinec1 = vlinec1_SSE2; - vlinec4 = vlinec4_SSE2; mvlinec1 = mvlinec1_SSE2; - mvlinec4 = mvlinec4_SSE2; tmvline1_add = tmvline1_add_SSE2; - tmvline4_add = tmvline4_add_SSE2; tmvline1_addclamp = tmvline1_addclamp_SSE2; - tmvline4_addclamp = tmvline4_addclamp_SSE2; tmvline1_subclamp = tmvline1_subclamp_SSE2; - tmvline4_subclamp = tmvline4_subclamp_SSE2; tmvline1_revsubclamp = tmvline1_revsubclamp_SSE2; - tmvline4_revsubclamp = tmvline4_revsubclamp_SSE2; DrawSky1 = DrawSky1_SSE2; - DrawSky4 = DrawSky4_SSE2; DrawDoubleSky1 = DrawDoubleSky1_SSE2; - DrawDoubleSky4 = DrawDoubleSky4_SSE2; TriDraw8.push_back(TriDraw8_0_SSE2); TriDraw8.push_back(TriDraw8_1_SSE2); TriDraw8.push_back(TriDraw8_2_SSE2); diff --git a/src/swrenderer/drawers/r_drawers.h b/src/swrenderer/drawers/r_drawers.h index 7a94c82dd..0a79f26d5 100644 --- a/src/swrenderer/drawers/r_drawers.h +++ b/src/swrenderer/drawers/r_drawers.h @@ -317,22 +317,14 @@ public: void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*vlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*mvlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline4_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline4_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline4_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; - void(*tmvline4_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; void(*DrawSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; - void(*DrawSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; - void(*DrawDoubleSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr; std::vector TriDraw8; std::vector TriDraw32; diff --git a/src/swrenderer/scene/r_plane.cpp b/src/swrenderer/scene/r_plane.cpp index bcd0d5e5f..8f9661fbd 100644 --- a/src/swrenderer/scene/r_plane.cpp +++ b/src/swrenderer/scene/r_plane.cpp @@ -931,16 +931,10 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub uint32_t solid_top = frontskytex->GetSkyCapColor(false); uint32_t solid_bottom = frontskytex->GetSkyCapColor(true); - if (columns == 4) - if (!backskytex) - R_DrawSingleSkyCol4(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol4(solid_top, solid_bottom); + if (!backskytex) + R_DrawSingleSkyColumn(solid_top, solid_bottom); else - if (!backskytex) - R_DrawSingleSkyCol1(solid_top, solid_bottom); - else - R_DrawDoubleSkyCol1(solid_top, solid_bottom); + R_DrawDoubleSkyColumn(solid_top, solid_bottom); } static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns) @@ -970,76 +964,7 @@ static void R_DrawCapSky(visplane_t *pl) short *uwal = (short *)pl->top; short *dwal = (short *)pl->bottom; - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - R_DrawSkyColumn(x, y1, y2, 1); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - } - if (empty_column_in_set || middle_y2 <= middle_y1) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - R_DrawSkyColumn(x + i, y1[i], y2[i], 1); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (y1[i] < middle_y1) - R_DrawSkyColumn(x + i, y1[i], middle_y1, 1); - } - - // Draw the area where all 4 columns are active - R_DrawSkyColumn(x, middle_y1, middle_y2, 4); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (middle_y2 < y2[i]) - R_DrawSkyColumn(x + i, middle_y2, y2[i], 1); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++) + for (int x = x1; x < x2; x++) { int y1 = uwal[x]; int y2 = dwal[x]; diff --git a/src/swrenderer/scene/r_walldraw.cpp b/src/swrenderer/scene/r_walldraw.cpp index 41e27d40e..24d22b2c5 100644 --- a/src/swrenderer/scene/r_walldraw.cpp +++ b/src/swrenderer/scene/r_walldraw.cpp @@ -670,102 +670,11 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1 } } -// Draw four columns with support for non-power-of-two ranges -static void Draw4Columns(int x, int y1, int y2, WallSampler *sampler, void(*draw4columns)()) -{ - if (r_swtruecolor) - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - dc_wall_source[i] = sampler[i].source; - dc_wall_source2[i] = sampler[i].source2; - dc_wall_texturefracx[i] = sampler[i].texturefracx; - dc_wall_sourceheight[i] = sampler[i].height; - dc_wall_texturefrac[i] = sampler[i].uv_pos; - dc_wall_iscale[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) * 4 + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed - { - int count = y2 - y1; - for (int i = 0; i < 4; i++) - { - dc_wall_source[i] = sampler[i].source; - dc_wall_source2[i] = sampler[i].source2; - dc_wall_texturefracx[i] = sampler[i].texturefracx; - dc_wall_texturefrac[i] = sampler[i].uv_pos; - dc_wall_iscale[i] = sampler[i].uv_step; - - uint64_t step64 = sampler[i].uv_step; - uint64_t pos64 = sampler[i].uv_pos; - sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count); - } - dc_dest = (ylookup[y1] + x) + dc_destorg; - dc_count = count; - draw4columns(); - } - else - { - dc_dest = (ylookup[y1] + x) + dc_destorg; - for (int i = 0; i < 4; i++) - { - dc_wall_source[i] = sampler[i].source; - dc_wall_source2[i] = sampler[i].source2; - dc_wall_texturefracx[i] = sampler[i].texturefracx; - } - - uint32_t left = y2 - y1; - while (left > 0) - { - // Find which column wraps first - uint32_t count = left; - for (int i = 0; i < 4; i++) - { - uint32_t available = sampler[i].uv_max - sampler[i].uv_pos; - uint32_t next_uv_wrap = available / sampler[i].uv_step; - if (available % sampler[i].uv_step != 0) - next_uv_wrap++; - count = MIN(next_uv_wrap, count); - } - - // Draw until that column wraps - for (int i = 0; i < 4; i++) - { - dc_wall_texturefrac[i] = sampler[i].uv_pos; - dc_wall_iscale[i] = sampler[i].uv_step; - } - dc_count = count; - draw4columns(); - - // Wrap the uv position - for (int i = 0; i < 4; i++) - { - sampler[i].uv_pos += sampler[i].uv_step * count; - if (sampler[i].uv_pos >= sampler[i].uv_max) - sampler[i].uv_pos -= sampler[i].uv_max; - } - - left -= count; - } - } - } -} - typedef void(*DrawColumnFuncPtr)(); static void ProcessWallWorker( int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, - const BYTE *(*getcol)(FTexture *tex, int x), DrawColumnFuncPtr draw1column, DrawColumnFuncPtr draw4columns) + const BYTE *(*getcol)(FTexture *tex, int x), DrawColumnFuncPtr drawcolumn) { if (rw_pic->UseType == FTexture::TEX_Null) return; @@ -805,33 +714,7 @@ static void ProcessWallWorker( double xmagnitude = 1.0; - if (r_dynlights) - { - for (int x = x1; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); - } - NetUpdate(); - return; - } - - // Calculate where 4 column alignment begins and ends: - int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2); - int aligned_x2 = clamp(x2 / 4 * 4, x1, x2); - - // First unaligned columns: - for (int x = x1; x < aligned_x1; x++, light += rw_lightstep) + for (int x = x1; x < x2; x++, light += rw_lightstep) { int y1 = uwal[x]; int y2 = dwal[x]; @@ -844,119 +727,7 @@ static void ProcessWallWorker( if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); - } - - // The aligned columns - for (int x = aligned_x1; x < aligned_x2; x += 4) - { - // Find y1, y2, light and uv values for four columns: - int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] }; - int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] }; - - float lights[4]; - for (int i = 0; i < 4; i++) - { - lights[i] = light; - light += rw_lightstep; - } - - WallSampler sampler[4]; - for (int i = 0; i < 4; i++) - { - if (x + i + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + i + 1]) - FIXED2DBL(lwal[x + i])); - sampler[i] = WallSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, xmagnitude, rw_pic, getcol); - } - - // Figure out where we vertically can start and stop drawing 4 columns in one go - int middle_y1 = y1[0]; - int middle_y2 = y2[0]; - for (int i = 1; i < 4; i++) - { - middle_y1 = MAX(y1[i], middle_y1); - middle_y2 = MIN(y2[i], middle_y2); - } - - // If we got an empty column in our set we cannot draw 4 columns in one go: - bool empty_column_in_set = false; - int bilinear_count = 0; - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - empty_column_in_set = true; - if (sampler[i].source2) - bilinear_count++; - } - - if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4)) - { - for (int i = 0; i < 4; i++) - { - if (y2[i] <= y1[i]) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - Draw1Column(x + i, y1[i], y2[i], sampler[i], draw1column); - } - continue; - } - - // Draw the first rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (y1[i] < middle_y1) - Draw1Column(x + i, y1[i], middle_y1, sampler[i], draw1column); - } - - // Draw the area where all 4 columns are active - if (!fixed) - { - for (int i = 0; i < 4; i++) - { - if (r_swtruecolor) - { - dc_wall_colormap[i] = basecolormap->Maps; - dc_wall_light[i] = LIGHTSCALE(lights[i], wallshade); - } - else - { - dc_wall_colormap[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT); - dc_wall_light[i] = 0; - } - } - } - Draw4Columns(x, middle_y1, middle_y2, sampler, draw4columns); - - // Draw the last rows where not all 4 columns are active - for (int i = 0; i < 4; i++) - { - if (!fixed) - R_SetColorMapLight(basecolormap, lights[i], wallshade); - - if (middle_y2 < y2[i]) - Draw1Column(x + i, middle_y2, y2[i], sampler[i], draw1column); - } - } - - // The last unaligned columns: - for (int x = aligned_x2; x < x2; x++, light += rw_lightstep) - { - int y1 = uwal[x]; - int y2 = dwal[x]; - if (y2 <= y1) - continue; - - if (!fixed) - R_SetColorMapLight(basecolormap, light, wallshade); - - if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x])); - - WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol); - Draw1Column(x, y1, y2, sampler, draw1column); + Draw1Column(x, y1, y2, sampler, drawcolumn); } NetUpdate(); @@ -964,7 +735,7 @@ static void ProcessWallWorker( static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallCol1, R_DrawWallCol4); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallColumn); } static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) @@ -975,22 +746,21 @@ static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *s } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallMaskedCol1, R_DrawWallMaskedCol4); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallMaskedColumn); } } static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn) { - static void(*drawcol1)(); - static void(*drawcol4)(); - if (!R_GetTransMaskDrawers(&drawcol1, &drawcol4)) + void(*drawcol1)(); + if (!R_GetTransMaskDrawers(&drawcol1)) { // The current translucency is unsupported, so draw with regular ProcessMaskedWall instead. ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol); } else { - ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, drawcol1, drawcol4); + ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, drawcol1); } } diff --git a/tools/drawergen/fixedfunction/drawskycodegen.cpp b/tools/drawergen/fixedfunction/drawskycodegen.cpp index 3a0581870..3bd23e529 100644 --- a/tools/drawergen/fixedfunction/drawskycodegen.cpp +++ b/tools/drawergen/fixedfunction/drawskycodegen.cpp @@ -32,28 +32,16 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" -void DrawSkyCodegen::Generate(DrawSkyVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) +void DrawSkyCodegen::Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(true); - source0[0] = args[0][1].load(true); - source0[1] = args[0][2].load(true); - source0[2] = args[0][3].load(true); - source0[3] = args[0][4].load(true); - source1[0] = args[0][5].load(true); - source1[1] = args[0][6].load(true); - source1[2] = args[0][7].load(true); - source1[3] = args[0][8].load(true); + source0 = args[0][1].load(true); + source1 = args[0][5].load(true); pitch = args[0][9].load(true); count = args[0][10].load(true); dest_y = args[0][11].load(true); - texturefrac[0] = args[0][12].load(true); - texturefrac[1] = args[0][13].load(true); - texturefrac[2] = args[0][14].load(true); - texturefrac[3] = args[0][15].load(true); - iscale[0] = args[0][16].load(true); - iscale[1] = args[0][17].load(true); - iscale[2] = args[0][18].load(true); - iscale[3] = args[0][19].load(true); + texturefrac = args[0][12].load(true); + iscale = args[0][16].load(true); textureheight0 = args[0][20].load(true); SSAInt textureheight1 = args[0][21].load(true); maxtextureheight1 = textureheight1 - 1; @@ -70,66 +58,45 @@ void DrawSkyCodegen::Generate(DrawSkyVariant variant, bool fourColumns, SSAValue pitch = pitch * thread.num_cores; - int numColumns = fourColumns ? 4 : 1; - for (int i = 0; i < numColumns; i++) - { - stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); - fracstep[i] = iscale[i] * thread.num_cores; - } + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + fracstep = iscale * thread.num_cores; - Loop(variant, fourColumns); + Loop(variant); } -void DrawSkyCodegen::Loop(DrawSkyVariant variant, bool fourColumns) +void DrawSkyCodegen::Loop(DrawSkyVariant variant) { - int numColumns = fourColumns ? 4 : 1; - stack_index.store(SSAInt(0)); { SSAForBlock loop; SSAInt index = stack_index.load(); loop.loop_block(index < count); - SSAInt frac[4]; - for (int i = 0; i < numColumns; i++) - frac[i] = stack_frac[i].load(); + SSAInt frac = stack_frac.load(); SSAInt offset = index * pitch * 4; - if (fourColumns) - { - SSAVec4i colors[4]; - for (int i = 0; i < 4; i++) - colors[i] = FadeOut(frac[i], Sample(frac[i], i, variant)); - - SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - dest[offset].store_unaligned_vec16ub(color); - } - else - { - SSAVec4i color = FadeOut(frac[0], Sample(frac[0], 0, variant)); - dest[offset].store_vec4ub(color); - } + SSAVec4i color = FadeOut(frac, Sample(frac, variant)); + dest[offset].store_vec4ub(color); stack_index.store(index.add(SSAInt(1), true, true)); - for (int i = 0; i < numColumns; i++) - stack_frac[i].store(frac[i] + fracstep[i]); + stack_frac.store(frac + fracstep); loop.end_block(); } } -SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, int index, DrawSkyVariant variant) +SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, DrawSkyVariant variant) { SSAInt sample_index = (((frac << 8) >> FRACBITS) * textureheight0) >> FRACBITS; if (variant == DrawSkyVariant::Single) { - return source0[index][sample_index * 4].load_vec4ub(false); + return source0[sample_index * 4].load_vec4ub(false); } else { SSAInt sample_index2 = SSAInt::MIN(sample_index, maxtextureheight1); - SSAVec4i color0 = source0[index][sample_index * 4].load_vec4ub(false); - SSAVec4i color1 = source1[index][sample_index2 * 4].load_vec4ub(false); + SSAVec4i color0 = source0[sample_index * 4].load_vec4ub(false); + SSAVec4i color1 = source1[sample_index2 * 4].load_vec4ub(false); return blend_alpha_blend(color0, color1); } } diff --git a/tools/drawergen/fixedfunction/drawskycodegen.h b/tools/drawergen/fixedfunction/drawskycodegen.h index aaf4bdfeb..6cd700203 100644 --- a/tools/drawergen/fixedfunction/drawskycodegen.h +++ b/tools/drawergen/fixedfunction/drawskycodegen.h @@ -33,28 +33,28 @@ enum class DrawSkyVariant class DrawSkyCodegen : public DrawerCodegen { public: - void Generate(DrawSkyVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); + void Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data); private: - void Loop(DrawSkyVariant variant, bool fourColumns); - SSAVec4i Sample(SSAInt frac, int index, DrawSkyVariant variant); + void Loop(DrawSkyVariant variant); + SSAVec4i Sample(SSAInt frac, DrawSkyVariant variant); SSAVec4i FadeOut(SSAInt frac, SSAVec4i color); - SSAStack stack_index, stack_frac[4]; + SSAStack stack_index, stack_frac; SSAUBytePtr dest; - SSAUBytePtr source0[4]; - SSAUBytePtr source1[4]; + SSAUBytePtr source0; + SSAUBytePtr source1; SSAInt pitch; SSAInt count; SSAInt dest_y; - SSAInt texturefrac[4]; - SSAInt iscale[4]; + SSAInt texturefrac; + SSAInt iscale; SSAInt textureheight0; SSAInt maxtextureheight1; SSAVec4i top_color; SSAVec4i bottom_color; SSAWorkerThread thread; - SSAInt fracstep[4]; + SSAInt fracstep; }; diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.cpp b/tools/drawergen/fixedfunction/drawwallcodegen.cpp index 898aebdbb..055b132d1 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.cpp +++ b/tools/drawergen/fixedfunction/drawwallcodegen.cpp @@ -32,40 +32,19 @@ #include "ssa/ssa_struct_type.h" #include "ssa/ssa_value.h" -void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) +void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(true); - source[0] = args[0][1].load(true); - source[1] = args[0][2].load(true); - source[2] = args[0][3].load(true); - source[3] = args[0][4].load(true); - source2[0] = args[0][5].load(true); - source2[1] = args[0][6].load(true); - source2[2] = args[0][7].load(true); - source2[3] = args[0][8].load(true); + source = args[0][1].load(true); + source2 = args[0][5].load(true); pitch = args[0][9].load(true); count = args[0][10].load(true); dest_y = args[0][11].load(true); - texturefrac[0] = args[0][12].load(true); - texturefrac[1] = args[0][13].load(true); - texturefrac[2] = args[0][14].load(true); - texturefrac[3] = args[0][15].load(true); - texturefracx[0] = args[0][16].load(true); - texturefracx[1] = args[0][17].load(true); - texturefracx[2] = args[0][18].load(true); - texturefracx[3] = args[0][19].load(true); - iscale[0] = args[0][20].load(true); - iscale[1] = args[0][21].load(true); - iscale[2] = args[0][22].load(true); - iscale[3] = args[0][23].load(true); - textureheight[0] = args[0][24].load(true); - textureheight[1] = args[0][25].load(true); - textureheight[2] = args[0][26].load(true); - textureheight[3] = args[0][27].load(true); - light[0] = args[0][28].load(true); - light[1] = args[0][29].load(true); - light[2] = args[0][30].load(true); - light[3] = args[0][31].load(true); + texturefrac = args[0][12].load(true); + texturefracx = args[0][16].load(true); + iscale = args[0][20].load(true); + textureheight = args[0][24].load(true); + light = args[0][28].load(true); srcalpha = args[0][32].load(true); destalpha = args[0][33].load(true); SSAShort light_alpha = args[0][34].load(true); @@ -99,42 +78,34 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal pitch = pitch * thread.num_cores; - int numColumns = fourColumns ? 4 : 1; - for (int i = 0; i < numColumns; i++) - { - stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); - fracstep[i] = iscale[i] * thread.num_cores; - one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; - } + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + fracstep = iscale * thread.num_cores; + one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1; start_z = start_z + step_z * SSAFloat(skipped_by_thread(dest_y, thread)); step_z = step_z * SSAFloat(thread.num_cores); SSAIfBlock branch; branch.if_block(is_simple_shade); - LoopShade(variant, fourColumns, true); + LoopShade(variant, true); branch.else_block(); - LoopShade(variant, fourColumns, false); + LoopShade(variant, false); branch.end_block(); } -void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade) +void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool isSimpleShade) { SSAIfBlock branch; branch.if_block(is_nearest_filter); - Loop(variant, fourColumns, isSimpleShade, true); + Loop(variant, isSimpleShade, true); branch.else_block(); - int numColumns = fourColumns ? 4 : 1; - for (int i = 0; i < numColumns; i++) - stack_frac[i].store(stack_frac[i].load() - (one[i] / 2)); - Loop(variant, fourColumns, isSimpleShade, false); + stack_frac.store(stack_frac.load() - (one / 2)); + Loop(variant, isSimpleShade, false); branch.end_block(); } -void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter) +void DrawWallCodegen::Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter) { - int numColumns = fourColumns ? 4 : 1; - stack_index.store(SSAInt(0)); stack_z.store(start_z); { @@ -143,57 +114,30 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim z = stack_z.load(); loop.loop_block(index < count); - SSAInt frac[4]; - for (int i = 0; i < numColumns; i++) - frac[i] = stack_frac[i].load(); - + SSAInt frac = stack_frac.load(); SSAInt offset = index * pitch * 4; - if (fourColumns) - { - SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false); - SSAVec8s bg0 = SSAVec8s::extendlo(bg); - SSAVec8s bg1 = SSAVec8s::extendhi(bg); - SSAVec4i bgcolors[4] = - { - SSAVec4i::extendlo(bg0), - SSAVec4i::extendhi(bg0), - SSAVec4i::extendlo(bg1), - SSAVec4i::extendhi(bg1) - }; - - SSAVec4i colors[4]; - for (int i = 0; i < 4; i++) - colors[i] = Blend(Shade(Sample(frac[i], i, isNearestFilter), i, isSimpleShade), bgcolors[i], variant); - - SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); - dest[offset].store_unaligned_vec16ub(color); - } - else - { - SSAVec4i bgcolor = dest[offset].load_vec4ub(false); - SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant); - dest[offset].store_vec4ub(color); - } + SSAVec4i bgcolor = dest[offset].load_vec4ub(false); + SSAVec4i color = Blend(Shade(Sample(frac, isNearestFilter), isSimpleShade), bgcolor, variant); + dest[offset].store_vec4ub(color); stack_z.store(z + step_z); stack_index.store(index.add(SSAInt(1), true, true)); - for (int i = 0; i < numColumns; i++) - stack_frac[i].store(frac[i] + fracstep[i]); + stack_frac.store(frac + fracstep); loop.end_block(); } } -SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) { if (isNearestFilter) { - SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS; - return source[index][sample_index * 4].load_vec4ub(false); + SSAInt sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS; + return source[sample_index * 4].load_vec4ub(false); } else { - return SampleLinear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); + return SampleLinear(source, source2, texturefracx, frac, one, textureheight); } } @@ -217,13 +161,13 @@ SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAIn return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; } -SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) +SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade) { SSAVec4i c; if (isSimpleShade) - c = shade_bgra_simple(fg, light[index]); + c = shade_bgra_simple(fg, light); else - c = shade_bgra_advanced(fg, light[index], shade_constants); + c = shade_bgra_advanced(fg, light, shade_constants); stack_lit_color.store(c); stack_light_index.store(SSAInt(0)); diff --git a/tools/drawergen/fixedfunction/drawwallcodegen.h b/tools/drawergen/fixedfunction/drawwallcodegen.h index cb46dcd5f..1afb5396a 100644 --- a/tools/drawergen/fixedfunction/drawwallcodegen.h +++ b/tools/drawergen/fixedfunction/drawwallcodegen.h @@ -37,31 +37,31 @@ enum class DrawWallVariant class DrawWallCodegen : public DrawerCodegen { public: - void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); + void Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data); private: - void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); - void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); - SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); + void LoopShade(DrawWallVariant variant, bool isSimpleShade); + void Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, bool isNearestFilter); SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); - SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); + SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); - SSAStack stack_index, stack_frac[4], stack_light_index; + SSAStack stack_index, stack_frac, stack_light_index; SSAStack stack_lit_color; SSAStack stack_z; SSAUBytePtr dest; - SSAUBytePtr source[4]; - SSAUBytePtr source2[4]; + SSAUBytePtr source; + SSAUBytePtr source2; SSAInt pitch; SSAInt count; SSAInt dest_y; - SSAInt texturefrac[4]; - SSAInt texturefracx[4]; - SSAInt iscale[4]; - SSAInt textureheight[4]; - SSAInt light[4]; + SSAInt texturefrac; + SSAInt texturefracx; + SSAInt iscale; + SSAInt textureheight; + SSAInt light; SSAInt srcalpha; SSAInt destalpha; SSABool is_simple_shade; @@ -69,8 +69,8 @@ private: SSAShadeConstants shade_constants; SSAWorkerThread thread; - SSAInt fracstep[4]; - SSAInt one[4]; + SSAInt fracstep; + SSAInt one; SSAFloat start_z, step_z; diff --git a/tools/drawergen/llvmdrawers.cpp b/tools/drawergen/llvmdrawers.cpp index 099919997..0ce8ee830 100644 --- a/tools/drawergen/llvmdrawers.cpp +++ b/tools/drawergen/llvmdrawers.cpp @@ -51,22 +51,14 @@ LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName, CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); - CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); - CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); - CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); - CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); - CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); - CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); - CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); - CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); - CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); - CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); - CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); - CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); - CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1); - CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4); - CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1); - CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp); + CodegenDrawSky("DrawSky1", DrawSkyVariant::Single); + CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double); for (int i = 0; i < NumTriBlendModes(); i++) { CodegenDrawTriangle("TriDraw8_" + std::to_string(i), (TriBlendMode)i, false, false); @@ -115,7 +107,7 @@ void LLVMDrawers::CodegenDrawSpan(const char *name, DrawSpanVariant variant) throw Exception("verifyFunction failed for CodegenDrawSpan()"); } -void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) +void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -126,7 +118,7 @@ void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int function.create_public(); DrawWallCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); @@ -134,7 +126,7 @@ void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int throw Exception("verifyFunction failed for CodegenDrawWall()"); } -void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns) +void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -145,7 +137,7 @@ void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int c function.create_public(); DrawSkyCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); diff --git a/tools/drawergen/llvmdrawers.h b/tools/drawergen/llvmdrawers.h index a7f1c8625..dd66c2a86 100644 --- a/tools/drawergen/llvmdrawers.h +++ b/tools/drawergen/llvmdrawers.h @@ -49,8 +49,8 @@ public: private: void CodegenDrawColumn(const char *name, DrawColumnVariant variant); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); - void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); - void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns); + void CodegenDrawWall(const char *name, DrawWallVariant variant); + void CodegenDrawSky(const char *name, DrawSkyVariant variant); void CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill); llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context);