Remove 4 column wall drawers

This commit is contained in:
Magnus Norddahl 2016-12-27 07:18:04 +01:00
parent 2659090e1c
commit 49903af394
16 changed files with 119 additions and 1403 deletions

View file

@ -496,30 +496,26 @@ namespace swrenderer
return tex->GetColumn(col, nullptr);
}
bool R_GetTransMaskDrawers(void(**drawCol1)(), void(**drawCol4)())
bool R_GetTransMaskDrawers(void(**drawColumn)())
{
if (colfunc == R_DrawAddColumn)
{
*drawCol1 = R_DrawWallAddCol1;
*drawCol4 = R_DrawWallAddCol4;
*drawColumn = R_DrawWallAddColumn;
return true;
}
if (colfunc == R_DrawAddClampColumn)
{
*drawCol1 = R_DrawWallAddClampCol1;
*drawCol4 = R_DrawWallAddClampCol4;
*drawColumn = R_DrawWallAddClampColumn;
return true;
}
if (colfunc == R_DrawSubClampColumn)
{
*drawCol1 = R_DrawWallSubClampCol1;
*drawCol4 = R_DrawWallSubClampCol4;
*drawColumn = R_DrawWallSubClampColumn;
return true;
}
if (colfunc == R_DrawRevSubClampColumn)
{
*drawCol1 = R_DrawWallRevSubClampCol1;
*drawCol4 = R_DrawWallRevSubClampCol4;
*drawColumn = R_DrawWallRevSubClampColumn;
return true;
}
return false;
@ -637,7 +633,7 @@ namespace swrenderer
/////////////////////////////////////////////////////////////////////////
void R_DrawWallCol1()
void R_DrawWallColumn()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWall1LLVMCommand>();
@ -645,15 +641,7 @@ namespace swrenderer
DrawerCommandQueue::QueueCommand<DrawWall1PalCommand>();
}
void R_DrawWallCol4()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWall4LLVMCommand>();
else
DrawerCommandQueue::QueueCommand<DrawWall4PalCommand>();
}
void R_DrawWallMaskedCol1()
void R_DrawWallMaskedColumn()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallMasked1LLVMCommand>();
@ -661,15 +649,7 @@ namespace swrenderer
DrawerCommandQueue::QueueCommand<DrawWallMasked1PalCommand>();
}
void R_DrawWallMaskedCol4()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallMasked4LLVMCommand>();
else
DrawerCommandQueue::QueueCommand<DrawWallMasked4PalCommand>();
}
void R_DrawWallAddCol1()
void R_DrawWallAddColumn()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallAdd1LLVMCommand>();
@ -679,15 +659,7 @@ namespace swrenderer
DrawerCommandQueue::QueueCommand<DrawWallAddClamp1PalCommand>();
}
void R_DrawWallAddCol4()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallAdd4LLVMCommand>();
else
DrawerCommandQueue::QueueCommand<DrawWallAdd4PalCommand>();
}
void R_DrawWallAddClampCol1()
void R_DrawWallAddClampColumn()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallAddClamp1LLVMCommand>();
@ -695,15 +667,7 @@ namespace swrenderer
DrawerCommandQueue::QueueCommand<DrawWallAddClamp1PalCommand>();
}
void R_DrawWallAddClampCol4()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallAddClamp4LLVMCommand>();
else
DrawerCommandQueue::QueueCommand<DrawWallAddClamp4PalCommand>();
}
void R_DrawWallSubClampCol1()
void R_DrawWallSubClampColumn()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallSubClamp1LLVMCommand>();
@ -711,15 +675,7 @@ namespace swrenderer
DrawerCommandQueue::QueueCommand<DrawWallSubClamp1PalCommand>();
}
void R_DrawWallSubClampCol4()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallSubClamp4LLVMCommand>();
else
DrawerCommandQueue::QueueCommand<DrawWallSubClamp4PalCommand>();
}
void R_DrawWallRevSubClampCol1()
void R_DrawWallRevSubClampColumn()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallRevSubClamp1LLVMCommand>();
@ -727,15 +683,7 @@ namespace swrenderer
DrawerCommandQueue::QueueCommand<DrawWallRevSubClamp1PalCommand>();
}
void R_DrawWallRevSubClampCol4()
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawWallRevSubClamp4LLVMCommand>();
else
DrawerCommandQueue::QueueCommand<DrawWallRevSubClamp4PalCommand>();
}
void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom)
void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom)
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawSingleSky1LLVMCommand>(solid_top, solid_bottom);
@ -743,15 +691,7 @@ namespace swrenderer
DrawerCommandQueue::QueueCommand<DrawSingleSky1PalCommand>(solid_top, solid_bottom);
}
void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom)
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawSingleSky4LLVMCommand>(solid_top, solid_bottom);
else
DrawerCommandQueue::QueueCommand<DrawSingleSky4PalCommand>(solid_top, solid_bottom);
}
void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom)
void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom)
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawDoubleSky1LLVMCommand>(solid_top, solid_bottom);
@ -759,14 +699,6 @@ namespace swrenderer
DrawerCommandQueue::QueueCommand<DrawDoubleSky1PalCommand>(solid_top, solid_bottom);
}
void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom)
{
if (r_swtruecolor)
DrawerCommandQueue::QueueCommand<DrawDoubleSky4LLVMCommand>(solid_top, solid_bottom);
else
DrawerCommandQueue::QueueCommand<DrawDoubleSky4PalCommand>(solid_top, solid_bottom);
}
void R_DrawColumn()
{
if (r_swtruecolor)

View file

@ -130,7 +130,7 @@ namespace swrenderer
bool R_SetPatchStyle(FRenderStyle style, fixed_t alpha, int translation, uint32_t color);
bool R_SetPatchStyle(FRenderStyle style, float alpha, int translation, uint32_t color);
void R_FinishSetPatchStyle(); // Call this after finished drawing the current thing, in case its style was STYLE_Shade
bool R_GetTransMaskDrawers(void(**drawCol1)(), void(**drawCol4)());
bool R_GetTransMaskDrawers(void(**drawColumn)());
const uint8_t *R_GetColumn(FTexture *tex, int col);
@ -165,23 +165,15 @@ namespace swrenderer
void R_DrawFogBoundary(int x1, int x2, short *uclip, short *dclip);
void R_FillSpan();
void R_DrawWallCol1();
void R_DrawWallCol4();
void R_DrawWallMaskedCol1();
void R_DrawWallMaskedCol4();
void R_DrawWallAddCol1();
void R_DrawWallAddCol4();
void R_DrawWallAddClampCol1();
void R_DrawWallAddClampCol4();
void R_DrawWallSubClampCol1();
void R_DrawWallSubClampCol4();
void R_DrawWallRevSubClampCol1();
void R_DrawWallRevSubClampCol4();
void R_DrawWallColumn();
void R_DrawWallMaskedColumn();
void R_DrawWallAddColumn();
void R_DrawWallAddClampColumn();
void R_DrawWallSubClampColumn();
void R_DrawWallRevSubClampColumn();
void R_DrawSingleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawSingleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawDoubleSkyCol1(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawDoubleSkyCol4(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawSingleSkyColumn(uint32_t solid_top, uint32_t solid_bottom);
void R_DrawDoubleSkyColumn(uint32_t solid_top, uint32_t solid_bottom);
// Sets dc_colormap and dc_light to their appropriate values depending on the output format (pal vs true color)
void R_SetColorMapLight(FSWColormap *base_colormap, float light, int shade);

View file

@ -110,25 +110,6 @@ namespace swrenderer
_step_viewpos_z = dc_viewpos_step.Z;
}
PalWall4Command::PalWall4Command()
{
using namespace drawerargs;
_dest = dc_dest;
_count = dc_count;
_pitch = dc_pitch;
_fracbits = dc_wall_fracbits;
for (int col = 0; col < 4; col++)
{
_colormap[col] = dc_wall_colormap[col];
_source[col] = dc_wall_source[col];
_iscale[col] = dc_wall_iscale[col];
_texturefrac[col] = dc_wall_texturefrac[col];
}
_srcblend = dc_srcblend;
_destblend = dc_destblend;
}
uint8_t PalWall1Command::AddLights(const TriLight *lights, int num_lights, float viewpos_z, uint8_t fg, uint8_t material)
{
uint32_t lit_r = GPalette.BaseColors[fg].r;
@ -217,56 +198,6 @@ namespace swrenderer
}
}
void DrawWall4PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
int count = _count;
int bits = _fracbits;
uint32_t place;
auto pal0 = _colormap[0];
auto pal1 = _colormap[1];
auto pal2 = _colormap[2];
auto pal3 = _colormap[3];
auto buf0 = _source[0];
auto buf1 = _source[1];
auto buf2 = _source[2];
auto buf3 = _source[3];
auto dc_wall_iscale0 = _iscale[0];
auto dc_wall_iscale1 = _iscale[1];
auto dc_wall_iscale2 = _iscale[2];
auto dc_wall_iscale3 = _iscale[3];
auto dc_wall_texturefrac0 = _texturefrac[0];
auto dc_wall_texturefrac1 = _texturefrac[1];
auto dc_wall_texturefrac2 = _texturefrac[2];
auto dc_wall_texturefrac3 = _texturefrac[3];
auto pitch = _pitch;
count = thread->count_for_thread(_dest_y, count);
if (count <= 0)
return;
int skipped = thread->skipped_by_thread(_dest_y);
dest = thread->dest_for_thread(_dest_y, pitch, dest);
dc_wall_texturefrac0 += dc_wall_iscale0 * skipped;
dc_wall_texturefrac1 += dc_wall_iscale1 * skipped;
dc_wall_texturefrac2 += dc_wall_iscale2 * skipped;
dc_wall_texturefrac3 += dc_wall_iscale3 * skipped;
dc_wall_iscale0 *= thread->num_cores;
dc_wall_iscale1 *= thread->num_cores;
dc_wall_iscale2 *= thread->num_cores;
dc_wall_iscale3 *= thread->num_cores;
pitch *= thread->num_cores;
do
{
dest[0] = pal0[buf0[(place = dc_wall_texturefrac0) >> bits]]; dc_wall_texturefrac0 = place + dc_wall_iscale0;
dest[1] = pal1[buf1[(place = dc_wall_texturefrac1) >> bits]]; dc_wall_texturefrac1 = place + dc_wall_iscale1;
dest[2] = pal2[buf2[(place = dc_wall_texturefrac2) >> bits]]; dc_wall_texturefrac2 = place + dc_wall_iscale2;
dest[3] = pal3[buf3[(place = dc_wall_texturefrac3) >> bits]]; dc_wall_texturefrac3 = place + dc_wall_iscale3;
dest += pitch;
} while (--count);
}
void DrawWallMasked1PalCommand::Execute(DrawerThread *thread)
{
uint32_t fracstep = _iscale;
@ -326,58 +257,6 @@ namespace swrenderer
}
}
void DrawWallMasked4PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
int count = _count;
int bits = _fracbits;
uint32_t place;
auto pal0 = _colormap[0];
auto pal1 = _colormap[1];
auto pal2 = _colormap[2];
auto pal3 = _colormap[3];
auto buf0 = _source[0];
auto buf1 = _source[1];
auto buf2 = _source[2];
auto buf3 = _source[3];
auto dc_wall_iscale0 = _iscale[0];
auto dc_wall_iscale1 = _iscale[1];
auto dc_wall_iscale2 = _iscale[2];
auto dc_wall_iscale3 = _iscale[3];
auto dc_wall_texturefrac0 = _texturefrac[0];
auto dc_wall_texturefrac1 = _texturefrac[1];
auto dc_wall_texturefrac2 = _texturefrac[2];
auto dc_wall_texturefrac3 = _texturefrac[3];
auto pitch = _pitch;
count = thread->count_for_thread(_dest_y, count);
if (count <= 0)
return;
int skipped = thread->skipped_by_thread(_dest_y);
dest = thread->dest_for_thread(_dest_y, pitch, dest);
dc_wall_texturefrac0 += dc_wall_iscale0 * skipped;
dc_wall_texturefrac1 += dc_wall_iscale1 * skipped;
dc_wall_texturefrac2 += dc_wall_iscale2 * skipped;
dc_wall_texturefrac3 += dc_wall_iscale3 * skipped;
dc_wall_iscale0 *= thread->num_cores;
dc_wall_iscale1 *= thread->num_cores;
dc_wall_iscale2 *= thread->num_cores;
dc_wall_iscale3 *= thread->num_cores;
pitch *= thread->num_cores;
do
{
uint8_t pix;
pix = buf0[(place = dc_wall_texturefrac0) >> bits]; if (pix) dest[0] = pal0[pix]; dc_wall_texturefrac0 = place + dc_wall_iscale0;
pix = buf1[(place = dc_wall_texturefrac1) >> bits]; if (pix) dest[1] = pal1[pix]; dc_wall_texturefrac1 = place + dc_wall_iscale1;
pix = buf2[(place = dc_wall_texturefrac2) >> bits]; if (pix) dest[2] = pal2[pix]; dc_wall_texturefrac2 = place + dc_wall_iscale2;
pix = buf3[(place = dc_wall_texturefrac3) >> bits]; if (pix) dest[3] = pal3[pix]; dc_wall_texturefrac3 = place + dc_wall_iscale3;
dest += pitch;
} while (--count);
}
void DrawWallAdd1PalCommand::Execute(DrawerThread *thread)
{
uint32_t fracstep = _iscale;
@ -440,72 +319,6 @@ namespace swrenderer
}
}
void DrawWallAdd4PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
int count = _count;
int bits = _fracbits;
uint32_t *fg2rgb = _srcblend;
uint32_t *bg2rgb = _destblend;
uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] };
uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] };
count = thread->count_for_thread(_dest_y, count);
if (count <= 0)
return;
int pitch = _pitch;
int skipped = thread->skipped_by_thread(_dest_y);
dest = thread->dest_for_thread(_dest_y, pitch, dest);
for (int i = 0; i < 4; i++)
{
dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped;
dc_wall_iscale[i] *= thread->num_cores;
}
pitch *= thread->num_cores;
if (!r_blendmethod)
{
do
{
for (int i = 0; i < 4; ++i)
{
uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits];
if (pix != 0)
{
uint32_t fg = fg2rgb[_colormap[i][pix]];
uint32_t bg = bg2rgb[dest[i]];
fg = (fg + bg) | 0x1f07c1f;
dest[i] = RGB32k.All[fg & (fg >> 15)];
}
dc_wall_texturefrac[i] += dc_wall_iscale[i];
}
dest += pitch;
} while (--count);
}
else
{
do
{
for (int i = 0; i < 4; ++i)
{
uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits];
if (pix != 0)
{
uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255);
uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255);
uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255);
dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2];
}
dc_wall_texturefrac[i] += dc_wall_iscale[i];
}
dest += pitch;
} while (--count);
}
}
void DrawWallAddClamp1PalCommand::Execute(DrawerThread *thread)
{
uint32_t fracstep = _iscale;
@ -580,50 +393,6 @@ namespace swrenderer
}
}
void DrawWallAddClamp4PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
int count = _count;
int bits = _fracbits;
uint32_t *fg2rgb = _srcblend;
uint32_t *bg2rgb = _destblend;
uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] };
uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] };
count = thread->count_for_thread(_dest_y, count);
if (count <= 0)
return;
int pitch = _pitch;
int skipped = thread->skipped_by_thread(_dest_y);
dest = thread->dest_for_thread(_dest_y, pitch, dest);
for (int i = 0; i < 4; i++)
{
dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped;
dc_wall_iscale[i] *= thread->num_cores;
}
pitch *= thread->num_cores;
do
{
for (int i = 0; i < 4; ++i)
{
uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits];
if (pix != 0)
{
uint32_t r = MIN(GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 255);
uint32_t g = MIN(GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 255);
uint32_t b = MIN(GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 255);
dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2];
}
dc_wall_texturefrac[i] += dc_wall_iscale[i];
}
dest += pitch;
} while (--count);
}
void DrawWallSubClamp1PalCommand::Execute(DrawerThread *thread)
{
uint32_t fracstep = _iscale;
@ -697,76 +466,6 @@ namespace swrenderer
}
}
void DrawWallSubClamp4PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
int count = _count;
int bits = _fracbits;
uint32_t *fg2rgb = _srcblend;
uint32_t *bg2rgb = _destblend;
uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] };
uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] };
count = thread->count_for_thread(_dest_y, count);
if (count <= 0)
return;
int pitch = _pitch;
int skipped = thread->skipped_by_thread(_dest_y);
dest = thread->dest_for_thread(_dest_y, pitch, dest);
for (int i = 0; i < 4; i++)
{
dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped;
dc_wall_iscale[i] *= thread->num_cores;
}
pitch *= thread->num_cores;
if (!r_blendmethod)
{
do
{
for (int i = 0; i < 4; ++i)
{
uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits];
if (pix != 0)
{
uint32_t a = (fg2rgb[_colormap[i][pix]] | 0x40100400) - bg2rgb[dest[i]];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[i] = RGB32k.All[a & (a >> 15)];
}
dc_wall_texturefrac[i] += dc_wall_iscale[i];
}
dest += pitch;
} while (--count);
}
else
{
do
{
for (int i = 0; i < 4; ++i)
{
uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits];
if (pix != 0)
{
int r = clamp(-GPalette.BaseColors[_colormap[i][pix]].r + GPalette.BaseColors[dest[i]].r, 0, 255);
int g = clamp(-GPalette.BaseColors[_colormap[i][pix]].g + GPalette.BaseColors[dest[i]].g, 0, 255);
int b = clamp(-GPalette.BaseColors[_colormap[i][pix]].b + GPalette.BaseColors[dest[i]].b, 0, 255);
dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2];
}
dc_wall_texturefrac[i] += dc_wall_iscale[i];
}
dest += pitch;
} while (--count);
}
}
void DrawWallRevSubClamp1PalCommand::Execute(DrawerThread *thread)
{
uint32_t fracstep = _iscale;
@ -840,76 +539,6 @@ namespace swrenderer
}
}
void DrawWallRevSubClamp4PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
int count = _count;
int bits = _fracbits;
uint32_t *fg2rgb = _srcblend;
uint32_t *bg2rgb = _destblend;
uint32_t dc_wall_texturefrac[4] = { _texturefrac[0], _texturefrac[1], _texturefrac[2], _texturefrac[3] };
uint32_t dc_wall_iscale[4] = { _iscale[0], _iscale[1], _iscale[2], _iscale[3] };
count = thread->count_for_thread(_dest_y, count);
if (count <= 0)
return;
int pitch = _pitch;
int skipped = thread->skipped_by_thread(_dest_y);
dest = thread->dest_for_thread(_dest_y, pitch, dest);
for (int i = 0; i < 4; i++)
{
dc_wall_texturefrac[i] += dc_wall_iscale[i] * skipped;
dc_wall_iscale[i] *= thread->num_cores;
}
pitch *= thread->num_cores;
if (!r_blendmethod)
{
do
{
for (int i = 0; i < 4; ++i)
{
uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits];
if (pix != 0)
{
uint32_t a = (bg2rgb[dest[i]] | 0x40100400) - fg2rgb[_colormap[i][pix]];
uint32_t b = a;
b &= 0x40100400;
b = b - (b >> 5);
a &= b;
a |= 0x01f07c1f;
dest[i] = RGB32k.All[a & (a >> 15)];
}
dc_wall_texturefrac[i] += dc_wall_iscale[i];
}
dest += _pitch;
} while (--count);
}
else
{
do
{
for (int i = 0; i < 4; ++i)
{
uint8_t pix = _source[i][dc_wall_texturefrac[i] >> bits];
if (pix != 0)
{
uint32_t r = clamp(GPalette.BaseColors[_colormap[i][pix]].r - GPalette.BaseColors[dest[i]].r, 0, 255);
uint32_t g = clamp(GPalette.BaseColors[_colormap[i][pix]].g - GPalette.BaseColors[dest[i]].g, 0, 255);
uint32_t b = clamp(GPalette.BaseColors[_colormap[i][pix]].b - GPalette.BaseColors[dest[i]].b, 0, 255);
dest[i] = RGB256k.RGB[r>>2][g>>2][b>>2];
}
dc_wall_texturefrac[i] += dc_wall_iscale[i];
}
dest += _pitch;
} while (--count);
}
}
/////////////////////////////////////////////////////////////////////////
PalSkyCommand::PalSkyCommand(uint32_t solid_top, uint32_t solid_bottom) : solid_top(solid_top), solid_bottom(solid_bottom)
@ -994,146 +623,6 @@ namespace swrenderer
}
}
void DrawSingleSky4PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
int count = _count;
int pitch = _pitch;
const uint8_t *source0[4] = { _source[0], _source[1], _source[2], _source[3] };
int textureheight0 = _sourceheight[0];
const uint32_t *palette = (const uint32_t *)GPalette.BaseColors;
int32_t frac[4] = { (int32_t)_texturefrac[0], (int32_t)_texturefrac[1], (int32_t)_texturefrac[2], (int32_t)_texturefrac[3] };
int32_t fracstep[4] = { (int32_t)_iscale[0], (int32_t)_iscale[1], (int32_t)_iscale[2], (int32_t)_iscale[3] };
uint8_t output[4];
int start_fade = 2; // How fast it should fade out
int solid_top_r = RPART(solid_top);
int solid_top_g = GPART(solid_top);
int solid_top_b = BPART(solid_top);
int solid_bottom_r = RPART(solid_bottom);
int solid_bottom_g = GPART(solid_bottom);
int solid_bottom_b = BPART(solid_bottom);
uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)];
uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)];
solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill;
solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill;
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
int fade_length = (1 << (24 - start_fade));
int start_fadetop_y = (-frac[0]) / fracstep[0];
int end_fadetop_y = (fade_length - frac[0]) / fracstep[0];
int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0];
int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0];
for (int col = 1; col < 4; col++)
{
start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]);
end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]);
start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]);
end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]);
}
start_fadetop_y = clamp(start_fadetop_y, 0, count);
end_fadetop_y = clamp(end_fadetop_y, 0, count);
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
int skipped = thread->skipped_by_thread(_dest_y);
dest = thread->dest_for_thread(_dest_y, pitch, dest);
for (int col = 0; col < 4; col++)
{
frac[col] += fracstep[col] * skipped;
fracstep[col] *= thread->num_cores;
}
pitch *= thread->num_cores;
int num_cores = thread->num_cores;
int index = skipped;
// Top solid color:
while (index < start_fadetop_y)
{
*((uint32_t*)dest) = solid_top_fill;
dest += pitch;
for (int col = 0; col < 4; col++)
frac[col] += fracstep[col];
index += num_cores;
}
// Top fade:
while (index < end_fadetop_y)
{
for (int col = 0; col < 4; col++)
{
uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
uint8_t fg = source0[col][sample_index];
uint32_t c = palette[fg];
int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0);
int inv_alpha_top = 256 - alpha_top;
int c_red = RPART(c);
int c_green = GPART(c);
int c_blue = BPART(c);
c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8;
c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8;
c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8;
output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)];
frac[col] += fracstep[col];
}
*((uint32_t*)dest) = *((uint32_t*)output);
dest += pitch;
index += num_cores;
}
// Textured center:
while (index < start_fadebottom_y)
{
for (int col = 0; col < 4; col++)
{
uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
output[col] = source0[col][sample_index];
frac[col] += fracstep[col];
}
*((uint32_t*)dest) = *((uint32_t*)output);
dest += pitch;
index += num_cores;
}
// Fade bottom:
while (index < end_fadebottom_y)
{
for (int col = 0; col < 4; col++)
{
uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
uint8_t fg = source0[col][sample_index];
uint32_t c = palette[fg];
int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0);
int inv_alpha_bottom = 256 - alpha_bottom;
int c_red = RPART(c);
int c_green = GPART(c);
int c_blue = BPART(c);
c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8;
c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8;
c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8;
output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)];
frac[col] += fracstep[col];
}
*((uint32_t*)dest) = *((uint32_t*)output);
dest += pitch;
index += num_cores;
}
// Bottom solid color:
while (index < count)
{
*((uint32_t*)dest) = solid_bottom_fill;
dest += pitch;
index += num_cores;
}
}
void DrawDoubleSky1PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
@ -1206,167 +695,6 @@ namespace swrenderer
}
}
void DrawDoubleSky4PalCommand::Execute(DrawerThread *thread)
{
uint8_t *dest = _dest;
int count = _count;
int pitch = _pitch;
const uint8_t *source0[4] = { _source[0], _source[1], _source[2], _source[3] };
const uint8_t *source1[4] = { _source2[0], _source2[1], _source2[2], _source2[3] };
int textureheight0 = _sourceheight[0];
uint32_t maxtextureheight1 = _sourceheight[1] - 1;
const uint32_t *palette = (const uint32_t *)GPalette.BaseColors;
int32_t frac[4] = { (int32_t)_texturefrac[0], (int32_t)_texturefrac[1], (int32_t)_texturefrac[2], (int32_t)_texturefrac[3] };
int32_t fracstep[4] = { (int32_t)_iscale[0], (int32_t)_iscale[1], (int32_t)_iscale[2], (int32_t)_iscale[3] };
uint8_t output[4];
int start_fade = 2; // How fast it should fade out
int solid_top_r = RPART(solid_top);
int solid_top_g = GPART(solid_top);
int solid_top_b = BPART(solid_top);
int solid_bottom_r = RPART(solid_bottom);
int solid_bottom_g = GPART(solid_bottom);
int solid_bottom_b = BPART(solid_bottom);
uint32_t solid_top_fill = RGB256k.RGB[(solid_top_r >> 2)][(solid_top_g >> 2)][(solid_top_b >> 2)];
uint32_t solid_bottom_fill = RGB256k.RGB[(solid_bottom_r >> 2)][(solid_bottom_g >> 2)][(solid_bottom_b >> 2)];
solid_top_fill = (solid_top_fill << 24) | (solid_top_fill << 16) | (solid_top_fill << 8) | solid_top_fill;
solid_bottom_fill = (solid_bottom_fill << 24) | (solid_bottom_fill << 16) | (solid_bottom_fill << 8) | solid_bottom_fill;
// Find bands for top solid color, top fade, center textured, bottom fade, bottom solid color:
int fade_length = (1 << (24 - start_fade));
int start_fadetop_y = (-frac[0]) / fracstep[0];
int end_fadetop_y = (fade_length - frac[0]) / fracstep[0];
int start_fadebottom_y = ((2 << 24) - fade_length - frac[0]) / fracstep[0];
int end_fadebottom_y = ((2 << 24) - frac[0]) / fracstep[0];
for (int col = 1; col < 4; col++)
{
start_fadetop_y = MIN(start_fadetop_y, (-frac[0]) / fracstep[0]);
end_fadetop_y = MAX(end_fadetop_y, (fade_length - frac[0]) / fracstep[0]);
start_fadebottom_y = MIN(start_fadebottom_y, ((2 << 24) - fade_length - frac[0]) / fracstep[0]);
end_fadebottom_y = MAX(end_fadebottom_y, ((2 << 24) - frac[0]) / fracstep[0]);
}
start_fadetop_y = clamp(start_fadetop_y, 0, count);
end_fadetop_y = clamp(end_fadetop_y, 0, count);
start_fadebottom_y = clamp(start_fadebottom_y, 0, count);
end_fadebottom_y = clamp(end_fadebottom_y, 0, count);
int skipped = thread->skipped_by_thread(_dest_y);
dest = thread->dest_for_thread(_dest_y, pitch, dest);
for (int col = 0; col < 4; col++)
{
frac[col] += fracstep[col] * skipped;
fracstep[col] *= thread->num_cores;
}
pitch *= thread->num_cores;
int num_cores = thread->num_cores;
int index = skipped;
// Top solid color:
while (index < start_fadetop_y)
{
*((uint32_t*)dest) = solid_top_fill;
dest += pitch;
for (int col = 0; col < 4; col++)
frac[col] += fracstep[col];
index += num_cores;
}
// Top fade:
while (index < end_fadetop_y)
{
for (int col = 0; col < 4; col++)
{
uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
uint8_t fg = source0[col][sample_index];
if (fg == 0)
{
uint32_t sample_index2 = MIN(sample_index, maxtextureheight1);
fg = source1[col][sample_index2];
}
output[col] = fg;
uint32_t c = palette[fg];
int alpha_top = MAX(MIN(frac[col] >> (16 - start_fade), 256), 0);
int inv_alpha_top = 256 - alpha_top;
int c_red = RPART(c);
int c_green = GPART(c);
int c_blue = BPART(c);
c_red = (c_red * alpha_top + solid_top_r * inv_alpha_top) >> 8;
c_green = (c_green * alpha_top + solid_top_g * inv_alpha_top) >> 8;
c_blue = (c_blue * alpha_top + solid_top_b * inv_alpha_top) >> 8;
output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)];
frac[col] += fracstep[col];
}
*((uint32_t*)dest) = *((uint32_t*)output);
dest += pitch;
index += num_cores;
}
// Textured center:
while (index < start_fadebottom_y)
{
for (int col = 0; col < 4; col++)
{
uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
uint8_t fg = source0[col][sample_index];
if (fg == 0)
{
uint32_t sample_index2 = MIN(sample_index, maxtextureheight1);
fg = source1[col][sample_index2];
}
output[col] = fg;
frac[col] += fracstep[col];
}
*((uint32_t*)dest) = *((uint32_t*)output);
dest += pitch;
index += num_cores;
}
// Fade bottom:
while (index < end_fadebottom_y)
{
for (int col = 0; col < 4; col++)
{
uint32_t sample_index = (((((uint32_t)frac[col]) << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
uint8_t fg = source0[col][sample_index];
if (fg == 0)
{
uint32_t sample_index2 = MIN(sample_index, maxtextureheight1);
fg = source1[col][sample_index2];
}
output[col] = fg;
uint32_t c = palette[fg];
int alpha_bottom = MAX(MIN(((2 << 24) - frac[col]) >> (16 - start_fade), 256), 0);
int inv_alpha_bottom = 256 - alpha_bottom;
int c_red = RPART(c);
int c_green = GPART(c);
int c_blue = BPART(c);
c_red = (c_red * alpha_bottom + solid_bottom_r * inv_alpha_bottom) >> 8;
c_green = (c_green * alpha_bottom + solid_bottom_g * inv_alpha_bottom) >> 8;
c_blue = (c_blue * alpha_bottom + solid_bottom_b * inv_alpha_bottom) >> 8;
output[col] = RGB256k.RGB[(c_red >> 2)][(c_green >> 2)][(c_blue >> 2)];
frac[col] += fracstep[col];
}
*((uint32_t*)dest) = *((uint32_t*)output);
dest += pitch;
index += num_cores;
}
// Bottom solid color:
while (index < count)
{
*((uint32_t*)dest) = solid_bottom_fill;
dest += pitch;
index += num_cores;
}
}
/////////////////////////////////////////////////////////////////////////
PalColumnCommand::PalColumnCommand()

View file

@ -32,37 +32,12 @@ namespace swrenderer
float _step_viewpos_z;
};
class PalWall4Command : public DrawerCommand
{
public:
PalWall4Command();
FString DebugInfo() override { return "PalWallCommand"; }
protected:
uint8_t *_dest;
int _count;
int _pitch;
int _fracbits;
uint8_t *_colormap[4];
const uint8_t *_source[4];
uint32_t _iscale[4];
uint32_t _texturefrac[4];
uint32_t *_srcblend;
uint32_t *_destblend;
};
class DrawWall1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWall4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallMasked1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallMasked4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallAdd1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallAdd4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallAddClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallAddClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallRevSubClamp1PalCommand : public PalWall1Command { public: void Execute(DrawerThread *thread) override; };
class DrawWallRevSubClamp4PalCommand : public PalWall4Command { public: void Execute(DrawerThread *thread) override; };
class PalSkyCommand : public DrawerCommand
{
@ -85,9 +60,7 @@ namespace swrenderer
};
class DrawSingleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
class DrawSingleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
class DrawDoubleSky1PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
class DrawDoubleSky4PalCommand : public PalSkyCommand { public: using PalSkyCommand::PalSkyCommand; void Execute(DrawerThread *thread) override; };
class PalColumnCommand : public DrawerCommand
{

View file

@ -181,72 +181,6 @@ namespace swrenderer
/////////////////////////////////////////////////////////////////////////////
WorkerThreadData DrawWall4LLVMCommand::ThreadData(DrawerThread *thread)
{
WorkerThreadData d;
d.core = thread->core;
d.num_cores = thread->num_cores;
d.pass_start_y = thread->pass_start_y;
d.pass_end_y = thread->pass_end_y;
return d;
}
DrawWall4LLVMCommand::DrawWall4LLVMCommand()
{
using namespace drawerargs;
args.dest = (uint32_t*)dc_dest;
args.dest_y = _dest_y;
args.count = dc_count;
args.pitch = dc_pitch;
args.light_red = dc_shade_constants.light_red;
args.light_green = dc_shade_constants.light_green;
args.light_blue = dc_shade_constants.light_blue;
args.light_alpha = dc_shade_constants.light_alpha;
args.fade_red = dc_shade_constants.fade_red;
args.fade_green = dc_shade_constants.fade_green;
args.fade_blue = dc_shade_constants.fade_blue;
args.fade_alpha = dc_shade_constants.fade_alpha;
args.desaturate = dc_shade_constants.desaturate;
for (int i = 0; i < 4; i++)
{
args.texturefrac[i] = dc_wall_texturefrac[i];
args.iscale[i] = dc_wall_iscale[i];
args.texturefracx[i] = dc_wall_texturefracx[i];
args.textureheight[i] = dc_wall_sourceheight[i];
args.source[i] = (const uint32_t *)dc_wall_source[i];
args.source2[i] = (const uint32_t *)dc_wall_source2[i];
args.light[i] = LightBgra::calc_light_multiplier(dc_wall_light[i]);
}
args.srcalpha = dc_srcalpha >> (FRACBITS - 8);
args.destalpha = dc_destalpha >> (FRACBITS - 8);
args.flags = 0;
if (dc_shade_constants.simple_shade)
args.flags |= DrawWallArgs::simple_shade;
if (args.source2[0] == nullptr)
args.flags |= DrawWallArgs::nearest_filter;
args.z = 0.0f;
args.step_z = 0.0f;
args.dynlights = nullptr;
args.num_dynlights = 0;
DetectRangeError(args.dest, args.dest_y, args.count);
}
void DrawWall4LLVMCommand::Execute(DrawerThread *thread)
{
WorkerThreadData d = ThreadData(thread);
Drawers::Instance()->vlinec4(&args, &d);
}
FString DrawWall4LLVMCommand::DebugInfo()
{
return "DrawWall4\n" + args.ToString();
}
/////////////////////////////////////////////////////////////////////////////
WorkerThreadData DrawWall1LLVMCommand::ThreadData(DrawerThread *thread)
{
WorkerThreadData d;

View file

@ -117,20 +117,6 @@ namespace swrenderer
void Execute(DrawerThread *thread) override;
};
class DrawWall4LLVMCommand : public DrawerCommand
{
protected:
DrawWallArgs args;
WorkerThreadData ThreadData(DrawerThread *thread);
public:
DrawWall4LLVMCommand();
void Execute(DrawerThread *thread) override;
FString DebugInfo() override;
};
class DrawWall1LLVMCommand : public DrawerCommand
{
protected:
@ -171,11 +157,6 @@ namespace swrenderer
FString DebugInfo() override;
};
DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand);
@ -197,9 +178,7 @@ namespace swrenderer
DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawSingleSky1, DrawSky1, DrawSkyLLVMCommand);
DECLARE_DRAW_COMMAND(DrawSingleSky4, DrawSky4, DrawSkyLLVMCommand);
DECLARE_DRAW_COMMAND(DrawDoubleSky1, DrawDoubleSky1, DrawSkyLLVMCommand);
DECLARE_DRAW_COMMAND(DrawDoubleSky4, DrawDoubleSky4, DrawSkyLLVMCommand);
class DrawFuzzColumnRGBACommand : public DrawerCommand
{

View file

@ -78,9 +78,7 @@ extern "C"
void DrawSpanAddClamp_SSE2(const DrawSpanArgs *);
void DrawSpanMaskedAddClamp_SSE2(const DrawSpanArgs *);
void vlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void vlinec4_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void mvlinec1_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void mvlinec4_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline1_add_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline4_add_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline1_addclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
@ -90,9 +88,7 @@ extern "C"
void tmvline1_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void tmvline4_revsubclamp_SSE2(const DrawWallArgs *, const WorkerThreadData *);
void DrawSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *);
void DrawSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *);
void DrawDoubleSky1_SSE2(const DrawSkyArgs *, const WorkerThreadData *);
void DrawDoubleSky4_SSE2(const DrawSkyArgs *, const WorkerThreadData *);
void TriDraw8_0_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_1_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
void TriDraw8_2_SSE2(const TriDrawTriangleArgs *, WorkerThreadData *);
@ -182,21 +178,13 @@ Drawers::Drawers()
DrawSpanAddClamp = DrawSpanAddClamp_SSE2;
DrawSpanMaskedAddClamp = DrawSpanMaskedAddClamp_SSE2;
vlinec1 = vlinec1_SSE2;
vlinec4 = vlinec4_SSE2;
mvlinec1 = mvlinec1_SSE2;
mvlinec4 = mvlinec4_SSE2;
tmvline1_add = tmvline1_add_SSE2;
tmvline4_add = tmvline4_add_SSE2;
tmvline1_addclamp = tmvline1_addclamp_SSE2;
tmvline4_addclamp = tmvline4_addclamp_SSE2;
tmvline1_subclamp = tmvline1_subclamp_SSE2;
tmvline4_subclamp = tmvline4_subclamp_SSE2;
tmvline1_revsubclamp = tmvline1_revsubclamp_SSE2;
tmvline4_revsubclamp = tmvline4_revsubclamp_SSE2;
DrawSky1 = DrawSky1_SSE2;
DrawSky4 = DrawSky4_SSE2;
DrawDoubleSky1 = DrawDoubleSky1_SSE2;
DrawDoubleSky4 = DrawDoubleSky4_SSE2;
TriDraw8.push_back(TriDraw8_0_SSE2);
TriDraw8.push_back(TriDraw8_1_SSE2);
TriDraw8.push_back(TriDraw8_2_SSE2);

View file

@ -317,22 +317,14 @@ public:
void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr;
void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*vlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*mvlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline4_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline4_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline4_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*tmvline4_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr;
void(*DrawSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr;
void(*DrawSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr;
void(*DrawDoubleSky1)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr;
void(*DrawDoubleSky4)(const DrawSkyArgs *, const WorkerThreadData *) = nullptr;
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriDraw8;
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriDraw32;

View file

@ -931,16 +931,10 @@ static void R_DrawSkyColumnStripe(int start_x, int y1, int y2, int columns, doub
uint32_t solid_top = frontskytex->GetSkyCapColor(false);
uint32_t solid_bottom = frontskytex->GetSkyCapColor(true);
if (columns == 4)
if (!backskytex)
R_DrawSingleSkyCol4(solid_top, solid_bottom);
R_DrawSingleSkyColumn(solid_top, solid_bottom);
else
R_DrawDoubleSkyCol4(solid_top, solid_bottom);
else
if (!backskytex)
R_DrawSingleSkyCol1(solid_top, solid_bottom);
else
R_DrawDoubleSkyCol1(solid_top, solid_bottom);
R_DrawDoubleSkyColumn(solid_top, solid_bottom);
}
static void R_DrawSkyColumn(int start_x, int y1, int y2, int columns)
@ -970,76 +964,7 @@ static void R_DrawCapSky(visplane_t *pl)
short *uwal = (short *)pl->top;
short *dwal = (short *)pl->bottom;
// Calculate where 4 column alignment begins and ends:
int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2);
int aligned_x2 = clamp(x2 / 4 * 4, x1, x2);
// First unaligned columns:
for (int x = x1; x < aligned_x1; x++)
{
int y1 = uwal[x];
int y2 = dwal[x];
if (y2 <= y1)
continue;
R_DrawSkyColumn(x, y1, y2, 1);
}
// The aligned columns
for (int x = aligned_x1; x < aligned_x2; x += 4)
{
// Find y1, y2, light and uv values for four columns:
int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] };
int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] };
// Figure out where we vertically can start and stop drawing 4 columns in one go
int middle_y1 = y1[0];
int middle_y2 = y2[0];
for (int i = 1; i < 4; i++)
{
middle_y1 = MAX(y1[i], middle_y1);
middle_y2 = MIN(y2[i], middle_y2);
}
// If we got an empty column in our set we cannot draw 4 columns in one go:
bool empty_column_in_set = false;
for (int i = 0; i < 4; i++)
{
if (y2[i] <= y1[i])
empty_column_in_set = true;
}
if (empty_column_in_set || middle_y2 <= middle_y1)
{
for (int i = 0; i < 4; i++)
{
if (y2[i] <= y1[i])
continue;
R_DrawSkyColumn(x + i, y1[i], y2[i], 1);
}
continue;
}
// Draw the first rows where not all 4 columns are active
for (int i = 0; i < 4; i++)
{
if (y1[i] < middle_y1)
R_DrawSkyColumn(x + i, y1[i], middle_y1, 1);
}
// Draw the area where all 4 columns are active
R_DrawSkyColumn(x, middle_y1, middle_y2, 4);
// Draw the last rows where not all 4 columns are active
for (int i = 0; i < 4; i++)
{
if (middle_y2 < y2[i])
R_DrawSkyColumn(x + i, middle_y2, y2[i], 1);
}
}
// The last unaligned columns:
for (int x = aligned_x2; x < x2; x++)
for (int x = x1; x < x2; x++)
{
int y1 = uwal[x];
int y2 = dwal[x];

View file

@ -670,102 +670,11 @@ static void Draw1Column(int x, int y1, int y2, WallSampler &sampler, void(*draw1
}
}
// Draw four columns with support for non-power-of-two ranges
static void Draw4Columns(int x, int y1, int y2, WallSampler *sampler, void(*draw4columns)())
{
if (r_swtruecolor)
{
int count = y2 - y1;
for (int i = 0; i < 4; i++)
{
dc_wall_source[i] = sampler[i].source;
dc_wall_source2[i] = sampler[i].source2;
dc_wall_texturefracx[i] = sampler[i].texturefracx;
dc_wall_sourceheight[i] = sampler[i].height;
dc_wall_texturefrac[i] = sampler[i].uv_pos;
dc_wall_iscale[i] = sampler[i].uv_step;
uint64_t step64 = sampler[i].uv_step;
uint64_t pos64 = sampler[i].uv_pos;
sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count);
}
dc_dest = (ylookup[y1] + x) * 4 + dc_destorg;
dc_count = count;
draw4columns();
}
else
{
if (sampler[0].uv_max == 0 || sampler[0].uv_step == 0) // power of two, no wrap handling needed
{
int count = y2 - y1;
for (int i = 0; i < 4; i++)
{
dc_wall_source[i] = sampler[i].source;
dc_wall_source2[i] = sampler[i].source2;
dc_wall_texturefracx[i] = sampler[i].texturefracx;
dc_wall_texturefrac[i] = sampler[i].uv_pos;
dc_wall_iscale[i] = sampler[i].uv_step;
uint64_t step64 = sampler[i].uv_step;
uint64_t pos64 = sampler[i].uv_pos;
sampler[i].uv_pos = (uint32_t)(pos64 + step64 * count);
}
dc_dest = (ylookup[y1] + x) + dc_destorg;
dc_count = count;
draw4columns();
}
else
{
dc_dest = (ylookup[y1] + x) + dc_destorg;
for (int i = 0; i < 4; i++)
{
dc_wall_source[i] = sampler[i].source;
dc_wall_source2[i] = sampler[i].source2;
dc_wall_texturefracx[i] = sampler[i].texturefracx;
}
uint32_t left = y2 - y1;
while (left > 0)
{
// Find which column wraps first
uint32_t count = left;
for (int i = 0; i < 4; i++)
{
uint32_t available = sampler[i].uv_max - sampler[i].uv_pos;
uint32_t next_uv_wrap = available / sampler[i].uv_step;
if (available % sampler[i].uv_step != 0)
next_uv_wrap++;
count = MIN(next_uv_wrap, count);
}
// Draw until that column wraps
for (int i = 0; i < 4; i++)
{
dc_wall_texturefrac[i] = sampler[i].uv_pos;
dc_wall_iscale[i] = sampler[i].uv_step;
}
dc_count = count;
draw4columns();
// Wrap the uv position
for (int i = 0; i < 4; i++)
{
sampler[i].uv_pos += sampler[i].uv_step * count;
if (sampler[i].uv_pos >= sampler[i].uv_max)
sampler[i].uv_pos -= sampler[i].uv_max;
}
left -= count;
}
}
}
}
typedef void(*DrawColumnFuncPtr)();
static void ProcessWallWorker(
int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat,
const BYTE *(*getcol)(FTexture *tex, int x), DrawColumnFuncPtr draw1column, DrawColumnFuncPtr draw4columns)
const BYTE *(*getcol)(FTexture *tex, int x), DrawColumnFuncPtr drawcolumn)
{
if (rw_pic->UseType == FTexture::TEX_Null)
return;
@ -805,8 +714,6 @@ static void ProcessWallWorker(
double xmagnitude = 1.0;
if (r_dynlights)
{
for (int x = x1; x < x2; x++, light += rw_lightstep)
{
int y1 = uwal[x];
@ -820,143 +727,7 @@ static void ProcessWallWorker(
if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x]));
WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol);
Draw1Column(x, y1, y2, sampler, draw1column);
}
NetUpdate();
return;
}
// Calculate where 4 column alignment begins and ends:
int aligned_x1 = clamp((x1 + 3) / 4 * 4, x1, x2);
int aligned_x2 = clamp(x2 / 4 * 4, x1, x2);
// First unaligned columns:
for (int x = x1; x < aligned_x1; x++, light += rw_lightstep)
{
int y1 = uwal[x];
int y2 = dwal[x];
if (y2 <= y1)
continue;
if (!fixed)
R_SetColorMapLight(basecolormap, light, wallshade);
if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x]));
WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol);
Draw1Column(x, y1, y2, sampler, draw1column);
}
// The aligned columns
for (int x = aligned_x1; x < aligned_x2; x += 4)
{
// Find y1, y2, light and uv values for four columns:
int y1[4] = { uwal[x], uwal[x + 1], uwal[x + 2], uwal[x + 3] };
int y2[4] = { dwal[x], dwal[x + 1], dwal[x + 2], dwal[x + 3] };
float lights[4];
for (int i = 0; i < 4; i++)
{
lights[i] = light;
light += rw_lightstep;
}
WallSampler sampler[4];
for (int i = 0; i < 4; i++)
{
if (x + i + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + i + 1]) - FIXED2DBL(lwal[x + i]));
sampler[i] = WallSampler(y1[i], swal[x + i], yrepeat, lwal[x + i] + xoffset, xmagnitude, rw_pic, getcol);
}
// Figure out where we vertically can start and stop drawing 4 columns in one go
int middle_y1 = y1[0];
int middle_y2 = y2[0];
for (int i = 1; i < 4; i++)
{
middle_y1 = MAX(y1[i], middle_y1);
middle_y2 = MIN(y2[i], middle_y2);
}
// If we got an empty column in our set we cannot draw 4 columns in one go:
bool empty_column_in_set = false;
int bilinear_count = 0;
for (int i = 0; i < 4; i++)
{
if (y2[i] <= y1[i])
empty_column_in_set = true;
if (sampler[i].source2)
bilinear_count++;
}
if (empty_column_in_set || middle_y2 <= middle_y1 || (bilinear_count > 0 && bilinear_count < 4))
{
for (int i = 0; i < 4; i++)
{
if (y2[i] <= y1[i])
continue;
if (!fixed)
R_SetColorMapLight(basecolormap, lights[i], wallshade);
Draw1Column(x + i, y1[i], y2[i], sampler[i], draw1column);
}
continue;
}
// Draw the first rows where not all 4 columns are active
for (int i = 0; i < 4; i++)
{
if (!fixed)
R_SetColorMapLight(basecolormap, lights[i], wallshade);
if (y1[i] < middle_y1)
Draw1Column(x + i, y1[i], middle_y1, sampler[i], draw1column);
}
// Draw the area where all 4 columns are active
if (!fixed)
{
for (int i = 0; i < 4; i++)
{
if (r_swtruecolor)
{
dc_wall_colormap[i] = basecolormap->Maps;
dc_wall_light[i] = LIGHTSCALE(lights[i], wallshade);
}
else
{
dc_wall_colormap[i] = basecolormap->Maps + (GETPALOOKUP(lights[i], wallshade) << COLORMAPSHIFT);
dc_wall_light[i] = 0;
}
}
}
Draw4Columns(x, middle_y1, middle_y2, sampler, draw4columns);
// Draw the last rows where not all 4 columns are active
for (int i = 0; i < 4; i++)
{
if (!fixed)
R_SetColorMapLight(basecolormap, lights[i], wallshade);
if (middle_y2 < y2[i])
Draw1Column(x + i, middle_y2, y2[i], sampler[i], draw1column);
}
}
// The last unaligned columns:
for (int x = aligned_x2; x < x2; x++, light += rw_lightstep)
{
int y1 = uwal[x];
int y2 = dwal[x];
if (y2 <= y1)
continue;
if (!fixed)
R_SetColorMapLight(basecolormap, light, wallshade);
if (x + 1 < x2) xmagnitude = fabs(FIXED2DBL(lwal[x + 1]) - FIXED2DBL(lwal[x]));
WallSampler sampler(y1, swal[x], yrepeat, lwal[x] + xoffset, xmagnitude, rw_pic, getcol);
Draw1Column(x, y1, y2, sampler, draw1column);
Draw1Column(x, y1, y2, sampler, drawcolumn);
}
NetUpdate();
@ -964,7 +735,7 @@ static void ProcessWallWorker(
static void ProcessNormalWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn)
{
ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallCol1, R_DrawWallCol4);
ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallColumn);
}
static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn)
@ -975,22 +746,21 @@ static void ProcessMaskedWall(int x1, int x2, short *uwal, short *dwal, float *s
}
else
{
ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallMaskedCol1, R_DrawWallMaskedCol4);
ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, R_DrawWallMaskedColumn);
}
}
static void ProcessTranslucentWall(int x1, int x2, short *uwal, short *dwal, float *swal, fixed_t *lwal, double yrepeat, const BYTE *(*getcol)(FTexture *tex, int x) = R_GetColumn)
{
static void(*drawcol1)();
static void(*drawcol4)();
if (!R_GetTransMaskDrawers(&drawcol1, &drawcol4))
void(*drawcol1)();
if (!R_GetTransMaskDrawers(&drawcol1))
{
// The current translucency is unsupported, so draw with regular ProcessMaskedWall instead.
ProcessMaskedWall(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol);
}
else
{
ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, drawcol1, drawcol4);
ProcessWallWorker(x1, x2, uwal, dwal, swal, lwal, yrepeat, getcol, drawcol1);
}
}

View file

@ -32,28 +32,16 @@
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
void DrawSkyCodegen::Generate(DrawSkyVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data)
void DrawSkyCodegen::Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data)
{
dest = args[0][0].load(true);
source0[0] = args[0][1].load(true);
source0[1] = args[0][2].load(true);
source0[2] = args[0][3].load(true);
source0[3] = args[0][4].load(true);
source1[0] = args[0][5].load(true);
source1[1] = args[0][6].load(true);
source1[2] = args[0][7].load(true);
source1[3] = args[0][8].load(true);
source0 = args[0][1].load(true);
source1 = args[0][5].load(true);
pitch = args[0][9].load(true);
count = args[0][10].load(true);
dest_y = args[0][11].load(true);
texturefrac[0] = args[0][12].load(true);
texturefrac[1] = args[0][13].load(true);
texturefrac[2] = args[0][14].load(true);
texturefrac[3] = args[0][15].load(true);
iscale[0] = args[0][16].load(true);
iscale[1] = args[0][17].load(true);
iscale[2] = args[0][18].load(true);
iscale[3] = args[0][19].load(true);
texturefrac = args[0][12].load(true);
iscale = args[0][16].load(true);
textureheight0 = args[0][20].load(true);
SSAInt textureheight1 = args[0][21].load(true);
maxtextureheight1 = textureheight1 - 1;
@ -70,66 +58,45 @@ void DrawSkyCodegen::Generate(DrawSkyVariant variant, bool fourColumns, SSAValue
pitch = pitch * thread.num_cores;
int numColumns = fourColumns ? 4 : 1;
for (int i = 0; i < numColumns; i++)
{
stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread));
fracstep[i] = iscale[i] * thread.num_cores;
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
fracstep = iscale * thread.num_cores;
Loop(variant);
}
Loop(variant, fourColumns);
}
void DrawSkyCodegen::Loop(DrawSkyVariant variant, bool fourColumns)
void DrawSkyCodegen::Loop(DrawSkyVariant variant)
{
int numColumns = fourColumns ? 4 : 1;
stack_index.store(SSAInt(0));
{
SSAForBlock loop;
SSAInt index = stack_index.load();
loop.loop_block(index < count);
SSAInt frac[4];
for (int i = 0; i < numColumns; i++)
frac[i] = stack_frac[i].load();
SSAInt frac = stack_frac.load();
SSAInt offset = index * pitch * 4;
if (fourColumns)
{
SSAVec4i colors[4];
for (int i = 0; i < 4; i++)
colors[i] = FadeOut(frac[i], Sample(frac[i], i, variant));
SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3]));
dest[offset].store_unaligned_vec16ub(color);
}
else
{
SSAVec4i color = FadeOut(frac[0], Sample(frac[0], 0, variant));
SSAVec4i color = FadeOut(frac, Sample(frac, variant));
dest[offset].store_vec4ub(color);
}
stack_index.store(index.add(SSAInt(1), true, true));
for (int i = 0; i < numColumns; i++)
stack_frac[i].store(frac[i] + fracstep[i]);
stack_frac.store(frac + fracstep);
loop.end_block();
}
}
SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, int index, DrawSkyVariant variant)
SSAVec4i DrawSkyCodegen::Sample(SSAInt frac, DrawSkyVariant variant)
{
SSAInt sample_index = (((frac << 8) >> FRACBITS) * textureheight0) >> FRACBITS;
if (variant == DrawSkyVariant::Single)
{
return source0[index][sample_index * 4].load_vec4ub(false);
return source0[sample_index * 4].load_vec4ub(false);
}
else
{
SSAInt sample_index2 = SSAInt::MIN(sample_index, maxtextureheight1);
SSAVec4i color0 = source0[index][sample_index * 4].load_vec4ub(false);
SSAVec4i color1 = source1[index][sample_index2 * 4].load_vec4ub(false);
SSAVec4i color0 = source0[sample_index * 4].load_vec4ub(false);
SSAVec4i color1 = source1[sample_index2 * 4].load_vec4ub(false);
return blend_alpha_blend(color0, color1);
}
}

View file

@ -33,28 +33,28 @@ enum class DrawSkyVariant
class DrawSkyCodegen : public DrawerCodegen
{
public:
void Generate(DrawSkyVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data);
void Generate(DrawSkyVariant variant, SSAValue args, SSAValue thread_data);
private:
void Loop(DrawSkyVariant variant, bool fourColumns);
SSAVec4i Sample(SSAInt frac, int index, DrawSkyVariant variant);
void Loop(DrawSkyVariant variant);
SSAVec4i Sample(SSAInt frac, DrawSkyVariant variant);
SSAVec4i FadeOut(SSAInt frac, SSAVec4i color);
SSAStack<SSAInt> stack_index, stack_frac[4];
SSAStack<SSAInt> stack_index, stack_frac;
SSAUBytePtr dest;
SSAUBytePtr source0[4];
SSAUBytePtr source1[4];
SSAUBytePtr source0;
SSAUBytePtr source1;
SSAInt pitch;
SSAInt count;
SSAInt dest_y;
SSAInt texturefrac[4];
SSAInt iscale[4];
SSAInt texturefrac;
SSAInt iscale;
SSAInt textureheight0;
SSAInt maxtextureheight1;
SSAVec4i top_color;
SSAVec4i bottom_color;
SSAWorkerThread thread;
SSAInt fracstep[4];
SSAInt fracstep;
};

View file

@ -32,40 +32,19 @@
#include "ssa/ssa_struct_type.h"
#include "ssa/ssa_value.h"
void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data)
void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data)
{
dest = args[0][0].load(true);
source[0] = args[0][1].load(true);
source[1] = args[0][2].load(true);
source[2] = args[0][3].load(true);
source[3] = args[0][4].load(true);
source2[0] = args[0][5].load(true);
source2[1] = args[0][6].load(true);
source2[2] = args[0][7].load(true);
source2[3] = args[0][8].load(true);
source = args[0][1].load(true);
source2 = args[0][5].load(true);
pitch = args[0][9].load(true);
count = args[0][10].load(true);
dest_y = args[0][11].load(true);
texturefrac[0] = args[0][12].load(true);
texturefrac[1] = args[0][13].load(true);
texturefrac[2] = args[0][14].load(true);
texturefrac[3] = args[0][15].load(true);
texturefracx[0] = args[0][16].load(true);
texturefracx[1] = args[0][17].load(true);
texturefracx[2] = args[0][18].load(true);
texturefracx[3] = args[0][19].load(true);
iscale[0] = args[0][20].load(true);
iscale[1] = args[0][21].load(true);
iscale[2] = args[0][22].load(true);
iscale[3] = args[0][23].load(true);
textureheight[0] = args[0][24].load(true);
textureheight[1] = args[0][25].load(true);
textureheight[2] = args[0][26].load(true);
textureheight[3] = args[0][27].load(true);
light[0] = args[0][28].load(true);
light[1] = args[0][29].load(true);
light[2] = args[0][30].load(true);
light[3] = args[0][31].load(true);
texturefrac = args[0][12].load(true);
texturefracx = args[0][16].load(true);
iscale = args[0][20].load(true);
textureheight = args[0][24].load(true);
light = args[0][28].load(true);
srcalpha = args[0][32].load(true);
destalpha = args[0][33].load(true);
SSAShort light_alpha = args[0][34].load(true);
@ -99,42 +78,34 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal
pitch = pitch * thread.num_cores;
int numColumns = fourColumns ? 4 : 1;
for (int i = 0; i < numColumns; i++)
{
stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread));
fracstep[i] = iscale[i] * thread.num_cores;
one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1;
}
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
fracstep = iscale * thread.num_cores;
one = ((0x80000000 + textureheight - 1) / textureheight) * 2 + 1;
start_z = start_z + step_z * SSAFloat(skipped_by_thread(dest_y, thread));
step_z = step_z * SSAFloat(thread.num_cores);
SSAIfBlock branch;
branch.if_block(is_simple_shade);
LoopShade(variant, fourColumns, true);
LoopShade(variant, true);
branch.else_block();
LoopShade(variant, fourColumns, false);
LoopShade(variant, false);
branch.end_block();
}
void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade)
void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool isSimpleShade)
{
SSAIfBlock branch;
branch.if_block(is_nearest_filter);
Loop(variant, fourColumns, isSimpleShade, true);
Loop(variant, isSimpleShade, true);
branch.else_block();
int numColumns = fourColumns ? 4 : 1;
for (int i = 0; i < numColumns; i++)
stack_frac[i].store(stack_frac[i].load() - (one[i] / 2));
Loop(variant, fourColumns, isSimpleShade, false);
stack_frac.store(stack_frac.load() - (one / 2));
Loop(variant, isSimpleShade, false);
branch.end_block();
}
void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter)
void DrawWallCodegen::Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter)
{
int numColumns = fourColumns ? 4 : 1;
stack_index.store(SSAInt(0));
stack_z.store(start_z);
{
@ -143,57 +114,30 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim
z = stack_z.load();
loop.loop_block(index < count);
SSAInt frac[4];
for (int i = 0; i < numColumns; i++)
frac[i] = stack_frac[i].load();
SSAInt frac = stack_frac.load();
SSAInt offset = index * pitch * 4;
if (fourColumns)
{
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(false);
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
SSAVec4i bgcolors[4] =
{
SSAVec4i::extendlo(bg0),
SSAVec4i::extendhi(bg0),
SSAVec4i::extendlo(bg1),
SSAVec4i::extendhi(bg1)
};
SSAVec4i colors[4];
for (int i = 0; i < 4; i++)
colors[i] = Blend(Shade(Sample(frac[i], i, isNearestFilter), i, isSimpleShade), bgcolors[i], variant);
SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3]));
dest[offset].store_unaligned_vec16ub(color);
}
else
{
SSAVec4i bgcolor = dest[offset].load_vec4ub(false);
SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant);
SSAVec4i color = Blend(Shade(Sample(frac, isNearestFilter), isSimpleShade), bgcolor, variant);
dest[offset].store_vec4ub(color);
}
stack_z.store(z + step_z);
stack_index.store(index.add(SSAInt(1), true, true));
for (int i = 0; i < numColumns; i++)
stack_frac[i].store(frac[i] + fracstep[i]);
stack_frac.store(frac + fracstep);
loop.end_block();
}
}
SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter)
SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter)
{
if (isNearestFilter)
{
SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS;
return source[index][sample_index * 4].load_vec4ub(false);
SSAInt sample_index = ((frac >> FRACBITS) * textureheight) >> FRACBITS;
return source[sample_index * 4].load_vec4ub(false);
}
else
{
return SampleLinear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]);
return SampleLinear(source, source2, texturefracx, frac, one, textureheight);
}
}
@ -217,13 +161,13 @@ SSAVec4i DrawWallCodegen::SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAIn
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade)
SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
{
SSAVec4i c;
if (isSimpleShade)
c = shade_bgra_simple(fg, light[index]);
c = shade_bgra_simple(fg, light);
else
c = shade_bgra_advanced(fg, light[index], shade_constants);
c = shade_bgra_advanced(fg, light, shade_constants);
stack_lit_color.store(c);
stack_light_index.store(SSAInt(0));

View file

@ -37,31 +37,31 @@ enum class DrawWallVariant
class DrawWallCodegen : public DrawerCodegen
{
public:
void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data);
void Generate(DrawWallVariant variant, SSAValue args, SSAValue thread_data);
private:
void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade);
void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter);
SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter);
void LoopShade(DrawWallVariant variant, bool isSimpleShade);
void Loop(DrawWallVariant variant, bool isSimpleShade, bool isNearestFilter);
SSAVec4i Sample(SSAInt frac, bool isNearestFilter);
SSAVec4i SampleLinear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade);
SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade);
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant);
SSAStack<SSAInt> stack_index, stack_frac[4], stack_light_index;
SSAStack<SSAInt> stack_index, stack_frac, stack_light_index;
SSAStack<SSAVec4i> stack_lit_color;
SSAStack<SSAFloat> stack_z;
SSAUBytePtr dest;
SSAUBytePtr source[4];
SSAUBytePtr source2[4];
SSAUBytePtr source;
SSAUBytePtr source2;
SSAInt pitch;
SSAInt count;
SSAInt dest_y;
SSAInt texturefrac[4];
SSAInt texturefracx[4];
SSAInt iscale[4];
SSAInt textureheight[4];
SSAInt light[4];
SSAInt texturefrac;
SSAInt texturefracx;
SSAInt iscale;
SSAInt textureheight;
SSAInt light;
SSAInt srcalpha;
SSAInt destalpha;
SSABool is_simple_shade;
@ -69,8 +69,8 @@ private:
SSAShadeConstants shade_constants;
SSAWorkerThread thread;
SSAInt fracstep[4];
SSAInt one[4];
SSAInt fracstep;
SSAInt one;
SSAFloat start_z, step_z;

View file

@ -51,22 +51,14 @@ LLVMDrawers::LLVMDrawers(const std::string &triple, const std::string &cpuName,
CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent);
CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp);
CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp);
CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1);
CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4);
CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1);
CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4);
CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1);
CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4);
CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1);
CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4);
CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1);
CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4);
CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1);
CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4);
CodegenDrawSky("DrawSky1", DrawSkyVariant::Single, 1);
CodegenDrawSky("DrawSky4", DrawSkyVariant::Single, 4);
CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double, 1);
CodegenDrawSky("DrawDoubleSky4", DrawSkyVariant::Double, 4);
CodegenDrawWall("vlinec1", DrawWallVariant::Opaque);
CodegenDrawWall("mvlinec1", DrawWallVariant::Masked);
CodegenDrawWall("tmvline1_add", DrawWallVariant::Add);
CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp);
CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp);
CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp);
CodegenDrawSky("DrawSky1", DrawSkyVariant::Single);
CodegenDrawSky("DrawDoubleSky1", DrawSkyVariant::Double);
for (int i = 0; i < NumTriBlendModes(); i++)
{
CodegenDrawTriangle("TriDraw8_" + std::to_string(i), (TriBlendMode)i, false, false);
@ -115,7 +107,7 @@ void LLVMDrawers::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
throw Exception("verifyFunction failed for CodegenDrawSpan()");
}
void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns)
void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
@ -126,7 +118,7 @@ void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int
function.create_public();
DrawWallCodegen codegen;
codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1));
codegen.Generate(variant, function.parameter(0), function.parameter(1));
builder.CreateRetVoid();
@ -134,7 +126,7 @@ void LLVMDrawers::CodegenDrawWall(const char *name, DrawWallVariant variant, int
throw Exception("verifyFunction failed for CodegenDrawWall()");
}
void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns)
void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
@ -145,7 +137,7 @@ void LLVMDrawers::CodegenDrawSky(const char *name, DrawSkyVariant variant, int c
function.create_public();
DrawSkyCodegen codegen;
codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1));
codegen.Generate(variant, function.parameter(0), function.parameter(1));
builder.CreateRetVoid();

View file

@ -49,8 +49,8 @@ public:
private:
void CodegenDrawColumn(const char *name, DrawColumnVariant variant);
void CodegenDrawSpan(const char *name, DrawSpanVariant variant);
void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns);
void CodegenDrawSky(const char *name, DrawSkyVariant variant, int columns);
void CodegenDrawWall(const char *name, DrawWallVariant variant);
void CodegenDrawSky(const char *name, DrawSkyVariant variant);
void CodegenDrawTriangle(const std::string &name, TriBlendMode blendmode, bool truecolor, bool colorfill);
llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context);