Changed column drawers to use true color textures

This commit is contained in:
Magnus Norddahl 2016-10-07 12:45:21 +02:00
parent 6512068005
commit aae4571c95
11 changed files with 215 additions and 176 deletions

View file

@ -89,6 +89,8 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
{
frac = stack_frac.load();
sample_index = frac >> FRACBITS;
if (!IsPaletteInput(variant))
sample_index = sample_index * 4;
}
else
{
@ -136,6 +138,33 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method,
}
}
bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant)
{
switch (variant)
{
default:
case DrawColumnVariant::DrawCopy:
case DrawColumnVariant::Draw:
case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp:
case DrawColumnVariant::DrawSubClamp:
case DrawColumnVariant::DrawRevSubClamp:
case DrawColumnVariant::Fill:
case DrawColumnVariant::FillAdd:
case DrawColumnVariant::FillAddClamp:
case DrawColumnVariant::FillSubClamp:
case DrawColumnVariant::FillRevSubClamp:
return false;
case DrawColumnVariant::DrawShaded:
case DrawColumnVariant::DrawTranslated:
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
case DrawColumnVariant::DrawSubClampTranslated:
case DrawColumnVariant::DrawRevSubClampTranslated:
return true;
}
}
SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade)
{
SSAInt alpha, inv_alpha;
@ -143,29 +172,29 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor,
{
default:
case DrawColumnVariant::DrawCopy:
return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub());
return blend_copy(Sample(sample_index));
case DrawColumnVariant::Draw:
return blend_copy(Shade(ColormapSample(sample_index), isSimpleShade));
return blend_copy(Shade(Sample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp:
return blend_add(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
return blend_add(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawShaded:
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4;
inv_alpha = 256 - alpha;
return blend_add(color, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::DrawSubClamp:
return blend_sub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
return blend_sub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawRevSubClamp:
return blend_revsub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
return blend_revsub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawTranslated:
return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade));
return blend_copy(ShadePal(TranslateSample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
return blend_add(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
return blend_add(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawSubClampTranslated:
return blend_sub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
return blend_sub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawRevSubClampTranslated:
return blend_revsub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
return blend_revsub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::Fill:
return blend_copy(color);
case DrawColumnVariant::FillAdd:
@ -182,6 +211,57 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor,
}
}
SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade)
{
SSAInt alpha, inv_alpha;
switch (variant)
{
default:
case DrawColumnVariant::DrawCopy:
return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub());
case DrawColumnVariant::Draw:
return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp:
return blend_add(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawShaded:
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4;
inv_alpha = 256 - alpha;
return blend_add(color, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::DrawSubClamp:
return blend_sub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawRevSubClamp:
return blend_revsub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawTranslated:
return blend_copy(ShadePal(TranslateSample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
return blend_add(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawSubClampTranslated:
return blend_sub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawRevSubClampTranslated:
return blend_revsub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::Fill:
return blend_copy(color);
case DrawColumnVariant::FillAdd:
alpha = srccolor[3];
alpha = alpha + (alpha >> 7);
inv_alpha = 256 - alpha;
return blend_add(srccolor, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::FillAddClamp:
return blend_add(srccolor, bgcolor, srcalpha, destalpha);
case DrawColumnVariant::FillSubClamp:
return blend_sub(srccolor, bgcolor, srcalpha, destalpha);
case DrawColumnVariant::FillRevSubClamp:
return blend_revsub(srccolor, bgcolor, srcalpha, destalpha);
}
}
SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index)
{
return source[sample_index].load_vec4ub();
}
SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index)
{
return colormap[source[sample_index].load().zext_int()].load().zext_int();
@ -192,7 +272,15 @@ SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index)
return translation[source[sample_index].load().zext_int()].load().zext_int();
}
SSAVec4i DrawColumnCodegen::Shade(SSAInt palIndex, bool isSimpleShade)
SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
{
if (isSimpleShade)
return shade_bgra_simple(fg, light);
else
return shade_bgra_advanced(fg, light, shade_constants);
}
SSAVec4i DrawColumnCodegen::ShadePal(SSAInt palIndex, bool isSimpleShade)
{
if (isSimpleShade)
return shade_pal_index_simple(palIndex, light, basecolors);

View file

@ -39,9 +39,13 @@ public:
private:
void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade);
SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade);
SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade);
SSAVec4i Sample(SSAInt frac);
SSAInt ColormapSample(SSAInt frac);
SSAInt TranslateSample(SSAInt frac);
SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade);
SSAVec4i Shade(SSAVec4i fgcolor, bool isSimpleShade);
SSAVec4i ShadePal(SSAInt palIndex, bool isSimpleShade);
bool IsPaletteInput(DrawColumnVariant variant);
SSAStack<SSAInt> stack_index, stack_frac;

View file

@ -107,6 +107,11 @@ LLVMDrawersImpl::LLVMDrawersImpl()
CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4);
@ -114,6 +119,11 @@ LLVMDrawersImpl::LLVMDrawersImpl()
CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4);
CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque);
CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked);
CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent);
@ -159,6 +169,11 @@ LLVMDrawersImpl::LLVMDrawersImpl()
DrawColumnRt1AddClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1AddClamp");
DrawColumnRt1SubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1SubClamp");
DrawColumnRt1RevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1RevSubClamp");
DrawColumnRt1Translated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1Translated");
DrawColumnRt1TlatedAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1TlatedAdd");
DrawColumnRt1AddClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1AddClampTranslated");
DrawColumnRt1SubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1SubClampTranslated");
DrawColumnRt1RevSubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1RevSubClampTranslated");
DrawColumnRt4 = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4");
DrawColumnRt4Copy = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4Copy");
DrawColumnRt4Add = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4Add");
@ -166,6 +181,11 @@ LLVMDrawersImpl::LLVMDrawersImpl()
DrawColumnRt4AddClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4AddClamp");
DrawColumnRt4SubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4SubClamp");
DrawColumnRt4RevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4RevSubClamp");
DrawColumnRt4Translated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4Translated");
DrawColumnRt4TlatedAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4TlatedAdd");
DrawColumnRt4AddClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4AddClampTranslated");
DrawColumnRt4SubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4SubClampTranslated");
DrawColumnRt4RevSubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4RevSubClampTranslated");
DrawSpan = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpan");
DrawSpanMasked = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMasked");
DrawSpanTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanTranslucent");

View file

@ -144,6 +144,11 @@ public:
void(*DrawColumnRt1AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
@ -151,6 +156,11 @@ public:
void(*DrawColumnRt4AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawSpan)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr;

View file

@ -191,6 +191,8 @@ FDynamicColormap ShadeFakeColormap[16];
BYTE identitymap[256];
FDynamicColormap identitycolormap;
bool drawer_needs_pal_input;
EXTERN_CVAR (Int, r_columnmethod)
void R_InitShadeMaps()
@ -2516,6 +2518,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
colfunc = transcolfunc;
hcolfunc_post1 = rt_tlate1col;
hcolfunc_post4 = rt_tlate4cols;
drawer_needs_pal_input = true;
}
return true;
}
@ -2566,6 +2569,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
colfunc = R_DrawTlatedAddColumn;
hcolfunc_post1 = rt_tlateadd1col;
hcolfunc_post4 = rt_tlateadd4cols;
drawer_needs_pal_input = true;
}
}
else
@ -2587,6 +2591,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
colfunc = R_DrawAddClampTranslatedColumn;
hcolfunc_post1 = rt_tlateaddclamp1col;
hcolfunc_post4 = rt_tlateaddclamp4cols;
drawer_needs_pal_input = true;
}
}
return true;
@ -2609,6 +2614,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
colfunc = R_DrawSubClampTranslatedColumn;
hcolfunc_post1 = rt_tlatesubclamp1col;
hcolfunc_post4 = rt_tlatesubclamp4cols;
drawer_needs_pal_input = true;
}
return true;
@ -2634,6 +2640,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags)
colfunc = R_DrawRevSubClampTranslatedColumn;
hcolfunc_post1 = rt_tlaterevsubclamp1col;
hcolfunc_post4 = rt_tlaterevsubclamp4cols;
drawer_needs_pal_input = true;
}
return true;
@ -2658,6 +2665,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
{
fixed_t fglevel, bglevel;
drawer_needs_pal_input = false;
style.CheckFuzz();
if (style.BlendOp == STYLEOP_Shadow)
@ -2706,6 +2715,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation,
colfunc = R_DrawShadedColumn;
hcolfunc_post1 = rt_shaded1col;
hcolfunc_post4 = rt_shaded4cols;
drawer_needs_pal_input = true;
dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)];
basecolormap = &ShadeFakeColormap[16-alpha];
if (fixedlightlev >= 0 && fixedcolormap == NULL)

View file

@ -1121,6 +1121,7 @@ void R_FillColumnHorizP_C (void)
void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span)
{
int pixelsize = r_swtruecolor ? 4 : 1;
int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1;
const fixed_t texturemid = FLOAT2FIXED(dc_texturemid);
while (span->Length != 0)
{
@ -1189,7 +1190,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span)
endfrac -= dc_iscale;
}
}
dc_source = column + top;
dc_source = column + top * inputpixelsize;
dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg;
dc_count = dc_yh - dc_yl + 1;
hcolfunc_pre ();

View file

@ -124,6 +124,11 @@ DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVM
DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand);
@ -131,129 +136,14 @@ DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVM
DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand);
/////////////////////////////////////////////////////////////////////////////
class RtTranslate1colRGBACommand : public DrawerCommand
{
const BYTE * RESTRICT translation;
int hx;
int yl;
int yh;
public:
RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh)
{
this->translation = translation;
this->hx = hx;
this->yl = yl;
this->yh = yh;
}
void Execute(DrawerThread *thread) override
{
int count = yh - yl + 1;
uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx];
// Things we do to hit the compiler's optimizer with a clue bat:
// 1. Parallelism is explicitly spelled out by using a separate
// C instruction for each assembly instruction. GCC lets me
// have four temporaries, but VC++ spills to the stack with
// more than two. Two is probably optimal, anyway.
// 2. The results of the translation lookups are explicitly
// stored in byte-sized variables. This causes the VC++ code
// to use byte mov instructions in most cases; for apparently
// random reasons, it will use movzx for some places. GCC
// ignores this and uses movzx always.
// Do 8 rows at a time.
for (int count8 = count >> 3; count8; --count8)
{
int c0, c1;
BYTE b0, b1;
c0 = source[0]; c1 = source[4];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[4] = b1;
c0 = source[8]; c1 = source[12];
b0 = translation[c0]; b1 = translation[c1];
source[8] = b0; source[12] = b1;
c0 = source[16]; c1 = source[20];
b0 = translation[c0]; b1 = translation[c1];
source[16] = b0; source[20] = b1;
c0 = source[24]; c1 = source[28];
b0 = translation[c0]; b1 = translation[c1];
source[24] = b0; source[28] = b1;
source += 32;
}
// Finish by doing 1 row at a time.
for (count &= 7; count; --count, source += 4)
{
source[0] = translation[source[0]];
}
}
};
class RtTranslate4colsRGBACommand : public DrawerCommand
{
const BYTE * RESTRICT translation;
int yl;
int yh;
public:
RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh)
{
this->translation = translation;
this->yl = yl;
this->yh = yh;
}
void Execute(DrawerThread *thread) override
{
int count = yh - yl + 1;
uint32_t *source = &thread->dc_temp_rgba[yl*4];
int c0, c1;
BYTE b0, b1;
// Do 2 rows at a time.
for (int count8 = count >> 1; count8; --count8)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
c0 = source[4]; c1 = source[5];
b0 = translation[c0]; b1 = translation[c1];
source[4] = b0; source[5] = b1;
c0 = source[6]; c1 = source[7];
b0 = translation[c0]; b1 = translation[c1];
source[6] = b0; source[7] = b1;
source += 8;
}
// Do the final row if count was odd.
if (count & 1)
{
c0 = source[0]; c1 = source[1];
b0 = translation[c0]; b1 = translation[c1];
source[0] = b0; source[1] = b1;
c0 = source[2]; c1 = source[3];
b0 = translation[c0]; b1 = translation[c1];
source[2] = b0; source[3] = b1;
}
}
};
class RtInitColsRGBACommand : public DrawerCommand
{
BYTE * RESTRICT buff;
@ -270,12 +160,13 @@ public:
}
};
template<typename InputPixelType>
class DrawColumnHorizRGBACommand : public DrawerCommand
{
int _count;
fixed_t _iscale;
fixed_t _texturefrac;
const BYTE * RESTRICT _source;
const InputPixelType * RESTRICT _source;
int _x;
int _yl;
int _yh;
@ -286,7 +177,7 @@ public:
_count = dc_count;
_iscale = dc_iscale;
_texturefrac = dc_texturefrac;
_source = dc_source;
_source = (const InputPixelType *)dc_source;
_x = dc_x;
_yl = dc_yl;
_yh = dc_yh;
@ -309,7 +200,7 @@ public:
fracstep = _iscale;
frac = _texturefrac;
const BYTE *source = _source;
const InputPixelType *source = _source;
if (count & 1) {
*dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep;
@ -419,28 +310,16 @@ void rt_map4cols_rgba (int sx, int yl, int yh)
DrawerCommandQueue::QueueCommand<DrawColumnRt4LLVMCommand>(0, sx, yl, yh);
}
void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtTranslate1colRGBACommand>(translation, hx, yl, yh);
}
void rt_Translate4cols_rgba(const BYTE *translation, int yl, int yh)
{
DrawerCommandQueue::QueueCommand<RtTranslate4colsRGBACommand>(translation, yl, yh);
}
// Translates one span at hx to the screen at sx.
void rt_tlate1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_map1col(hx, sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt1TranslatedLLVMCommand>(hx, sx, yl, yh);
}
// Translates all four spans to the screen starting at sx.
void rt_tlate4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_map4cols(sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt4TranslatedLLVMCommand>(0, sx, yl, yh);
}
// Adds one span at hx to the screen at sx without clamping.
@ -458,15 +337,13 @@ void rt_add4cols_rgba (int sx, int yl, int yh)
// Translates and adds one span at hx to the screen at sx without clamping.
void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_add1col(hx, sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt1AddClampTranslatedLLVMCommand>(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx without clamping.
void rt_tlateadd4cols_rgba(int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_add4cols(sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt4AddClampTranslatedLLVMCommand>(0, sx, yl, yh);
}
// Shades one span at hx to the screen at sx.
@ -496,15 +373,13 @@ void rt_addclamp4cols_rgba (int sx, int yl, int yh)
// Translates and adds one span at hx to the screen at sx with clamping.
void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_addclamp1col_rgba(hx, sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt1AddClampTranslatedLLVMCommand>(hx, sx, yl, yh);
}
// Translates and adds all four spans to the screen starting at sx with clamping.
void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_addclamp4cols(sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt4AddClampTranslatedLLVMCommand>(0, sx, yl, yh);
}
// Subtracts one span at hx to the screen at sx with clamping.
@ -522,15 +397,13 @@ void rt_subclamp4cols_rgba (int sx, int yl, int yh)
// Translates and subtracts one span at hx to the screen at sx with clamping.
void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_subclamp1col_rgba(hx, sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt1SubClampTranslatedLLVMCommand>(hx, sx, yl, yh);
}
// Translates and subtracts all four spans to the screen starting at sx with clamping.
void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_subclamp4cols_rgba(sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt4SubClampTranslatedLLVMCommand>(0, sx, yl, yh);
}
// Subtracts one span at hx from the screen at sx with clamping.
@ -548,15 +421,13 @@ void rt_revsubclamp4cols_rgba (int sx, int yl, int yh)
// Translates and subtracts one span at hx from the screen at sx with clamping.
void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh)
{
rt_Translate1col_rgba(dc_translation, hx, yl, yh);
rt_revsubclamp1col_rgba(hx, sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt1RevSubClampTranslatedLLVMCommand>(hx, sx, yl, yh);
}
// Translates and subtracts all four spans from the screen starting at sx with clamping.
void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh)
{
rt_Translate4cols_rgba(dc_translation, yl, yh);
rt_revsubclamp4cols_rgba(sx, yl, yh);
DrawerCommandQueue::QueueCommand<DrawColumnRt4RevSubClampTranslatedLLVMCommand>(0, sx, yl, yh);
}
// Before each pass through a rendering loop that uses these routines,
@ -590,7 +461,10 @@ void R_DrawColumnHoriz_rgba (void)
(*span)[1] = dc_yh;
*span += 2;
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand>();
if (drawer_needs_pal_input)
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand<uint8_t>>();
else
DrawerCommandQueue::QueueCommand<DrawColumnHorizRGBACommand<uint32_t>>();
}
// [RH] Just fills a column with a given color

View file

@ -126,7 +126,7 @@ extern void (*hcolfunc_pre) (void);
extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh);
extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh);
extern void (*hcolfunc_post4) (int sx, int yl, int yh);
extern bool drawer_needs_pal_input;
void R_InitTextureMapping ();

View file

@ -197,7 +197,11 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText
// draw the texture
const FTexture::Span *spans;
const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans);
const BYTE *pixels;
if (r_swtruecolor && !drawer_needs_pal_input)
pixels = (const BYTE *)tex->GetColumnBgra(maskedtexturecol[dc_x] >> FRACBITS, &spans);
else
pixels = tex->GetColumn(maskedtexturecol[dc_x] >> FRACBITS, &spans);
blastfunc (pixels, spans);
rw_light += rw_lightstep;
spryscale += rw_scalestep;

View file

@ -253,6 +253,7 @@ bool sprflipvert;
void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span)
{
int pixelsize = r_swtruecolor ? 4 : 1;
int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1;
const fixed_t centeryfrac = FLOAT2FIXED(CenterY);
const fixed_t texturemid = FLOAT2FIXED(dc_texturemid);
while (span->Length != 0)
@ -322,7 +323,7 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span)
endfrac -= dc_iscale;
}
}
dc_source = column + top;
dc_source = column + top * inputpixelsize;
dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg;
dc_count = dc_yh - dc_yl + 1;
colfunc ();
@ -469,7 +470,11 @@ void R_DrawVisSprite (vissprite_t *vis)
{
while ((dc_x < stop4) && (dc_x & 3))
{
pixels = tex->GetColumn (frac >> FRACBITS, &spans);
if (r_swtruecolor && !drawer_needs_pal_input)
pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans);
else
pixels = tex->GetColumn (frac >> FRACBITS, &spans);
if (ispsprite || !R_ClipSpriteColumnWithPortals(vis))
R_DrawMaskedColumn (pixels, spans);
dc_x++;
@ -481,7 +486,11 @@ void R_DrawVisSprite (vissprite_t *vis)
rt_initcols(nullptr);
for (int zz = 4; zz; --zz)
{
pixels = tex->GetColumn (frac >> FRACBITS, &spans);
if (r_swtruecolor && !drawer_needs_pal_input)
pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans);
else
pixels = tex->GetColumn (frac >> FRACBITS, &spans);
if (ispsprite || !R_ClipSpriteColumnWithPortals(vis))
R_DrawMaskedColumnHoriz (pixels, spans);
dc_x++;
@ -492,7 +501,11 @@ void R_DrawVisSprite (vissprite_t *vis)
while (dc_x < x2)
{
pixels = tex->GetColumn (frac >> FRACBITS, &spans);
if (r_swtruecolor && !drawer_needs_pal_input)
pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans);
else
pixels = tex->GetColumn (frac >> FRACBITS, &spans);
if (ispsprite || !R_ClipSpriteColumnWithPortals(vis))
R_DrawMaskedColumn (pixels, spans);
dc_x++;
@ -650,7 +663,10 @@ void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Sp
const BYTE *column;
const FTexture::Span *spans;
column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans);
if (r_swtruecolor && !drawer_needs_pal_input)
column = (const BYTE *)WallSpriteTile->GetColumnBgra (lwall[dc_x] >> FRACBITS, &spans);
else
column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans);
dc_texturefrac = 0;
drawfunc (column, spans);
rw_light += rw_lightstep;

View file

@ -305,7 +305,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
{
while ((dc_x < stop4) && (dc_x & 3))
{
pixels = img->GetColumn(frac >> FRACBITS, spanptr);
if (r_swtruecolor && !drawer_needs_pal_input)
pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr);
else
pixels = img->GetColumn(frac >> FRACBITS, spanptr);
R_DrawMaskedColumn(pixels, spans);
dc_x++;
frac += xiscale_i;
@ -316,7 +320,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
rt_initcols(nullptr);
for (int zz = 4; zz; --zz)
{
pixels = img->GetColumn(frac >> FRACBITS, spanptr);
if (r_swtruecolor && !drawer_needs_pal_input)
pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr);
else
pixels = img->GetColumn(frac >> FRACBITS, spanptr);
R_DrawMaskedColumnHoriz(pixels, spans);
dc_x++;
frac += xiscale_i;
@ -326,7 +334,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms)
while (dc_x < x2_i)
{
pixels = img->GetColumn(frac >> FRACBITS, spanptr);
if (r_swtruecolor && !drawer_needs_pal_input)
pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr);
else
pixels = img->GetColumn(frac >> FRACBITS, spanptr);
R_DrawMaskedColumn(pixels, spans);
dc_x++;
frac += xiscale_i;