diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 601358274..e920fb73a 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -89,6 +89,8 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, { frac = stack_frac.load(); sample_index = frac >> FRACBITS; + if (!IsPaletteInput(variant)) + sample_index = sample_index * 4; } else { @@ -136,6 +138,33 @@ void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, } } +bool DrawColumnCodegen::IsPaletteInput(DrawColumnVariant variant) +{ + switch (variant) + { + default: + case DrawColumnVariant::DrawCopy: + case DrawColumnVariant::Draw: + case DrawColumnVariant::DrawAdd: + case DrawColumnVariant::DrawAddClamp: + case DrawColumnVariant::DrawSubClamp: + case DrawColumnVariant::DrawRevSubClamp: + case DrawColumnVariant::Fill: + case DrawColumnVariant::FillAdd: + case DrawColumnVariant::FillAddClamp: + case DrawColumnVariant::FillSubClamp: + case DrawColumnVariant::FillRevSubClamp: + return false; + case DrawColumnVariant::DrawShaded: + case DrawColumnVariant::DrawTranslated: + case DrawColumnVariant::DrawTlatedAdd: + case DrawColumnVariant::DrawAddClampTranslated: + case DrawColumnVariant::DrawSubClampTranslated: + case DrawColumnVariant::DrawRevSubClampTranslated: + return true; + } +} + SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) { SSAInt alpha, inv_alpha; @@ -143,29 +172,29 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, { default: case DrawColumnVariant::DrawCopy: - return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub()); + return blend_copy(Sample(sample_index)); case DrawColumnVariant::Draw: - return blend_copy(Shade(ColormapSample(sample_index), isSimpleShade)); + return blend_copy(Shade(Sample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawAdd: case DrawColumnVariant::DrawAddClamp: - return blend_add(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_add(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawShaded: alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; inv_alpha = 256 - alpha; return blend_add(color, bgcolor, alpha, inv_alpha); case DrawColumnVariant::DrawSubClamp: - return blend_sub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_sub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawRevSubClamp: - return blend_revsub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_revsub(Shade(Sample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawTranslated: - return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); + return blend_copy(ShadePal(TranslateSample(sample_index), isSimpleShade)); case DrawColumnVariant::DrawTlatedAdd: case DrawColumnVariant::DrawAddClampTranslated: - return blend_add(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_add(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawSubClampTranslated: - return blend_sub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_sub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::DrawRevSubClampTranslated: - return blend_revsub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + return blend_revsub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); case DrawColumnVariant::Fill: return blend_copy(color); case DrawColumnVariant::FillAdd: @@ -182,6 +211,57 @@ SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, } } +SSAVec4i DrawColumnCodegen::ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) +{ + SSAInt alpha, inv_alpha; + switch (variant) + { + default: + case DrawColumnVariant::DrawCopy: + return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub()); + case DrawColumnVariant::Draw: + return blend_copy(ShadePal(ColormapSample(sample_index), isSimpleShade)); + case DrawColumnVariant::DrawAdd: + case DrawColumnVariant::DrawAddClamp: + return blend_add(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawShaded: + alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; + inv_alpha = 256 - alpha; + return blend_add(color, bgcolor, alpha, inv_alpha); + case DrawColumnVariant::DrawSubClamp: + return blend_sub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawRevSubClamp: + return blend_revsub(ShadePal(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawTranslated: + return blend_copy(ShadePal(TranslateSample(sample_index), isSimpleShade)); + case DrawColumnVariant::DrawTlatedAdd: + case DrawColumnVariant::DrawAddClampTranslated: + return blend_add(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawSubClampTranslated: + return blend_sub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawRevSubClampTranslated: + return blend_revsub(ShadePal(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::Fill: + return blend_copy(color); + case DrawColumnVariant::FillAdd: + alpha = srccolor[3]; + alpha = alpha + (alpha >> 7); + inv_alpha = 256 - alpha; + return blend_add(srccolor, bgcolor, alpha, inv_alpha); + case DrawColumnVariant::FillAddClamp: + return blend_add(srccolor, bgcolor, srcalpha, destalpha); + case DrawColumnVariant::FillSubClamp: + return blend_sub(srccolor, bgcolor, srcalpha, destalpha); + case DrawColumnVariant::FillRevSubClamp: + return blend_revsub(srccolor, bgcolor, srcalpha, destalpha); + } +} + +SSAVec4i DrawColumnCodegen::Sample(SSAInt sample_index) +{ + return source[sample_index].load_vec4ub(); +} + SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) { return colormap[source[sample_index].load().zext_int()].load().zext_int(); @@ -192,7 +272,15 @@ SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index) return translation[source[sample_index].load().zext_int()].load().zext_int(); } -SSAVec4i DrawColumnCodegen::Shade(SSAInt palIndex, bool isSimpleShade) +SSAVec4i DrawColumnCodegen::Shade(SSAVec4i fg, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light); + else + return shade_bgra_advanced(fg, light, shade_constants); +} + +SSAVec4i DrawColumnCodegen::ShadePal(SSAInt palIndex, bool isSimpleShade) { if (isSimpleShade) return shade_pal_index_simple(palIndex, light, basecolors); diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index 675a5ea67..ffba50a15 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -39,9 +39,13 @@ public: private: void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); + SSAVec4i ProcessPixelPal(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); + SSAVec4i Sample(SSAInt frac); SSAInt ColormapSample(SSAInt frac); SSAInt TranslateSample(SSAInt frac); - SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade); + SSAVec4i Shade(SSAVec4i fgcolor, bool isSimpleShade); + SSAVec4i ShadePal(SSAInt palIndex, bool isSimpleShade); + bool IsPaletteInput(DrawColumnVariant variant); SSAStack stack_index, stack_frac; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 3108b8c6a..ac0633058 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -107,6 +107,11 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt1); CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); @@ -114,6 +119,11 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Translated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4TlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Rt4); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); @@ -159,6 +169,11 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawColumnRt1AddClamp = mProgram.GetProcAddress("DrawColumnRt1AddClamp"); DrawColumnRt1SubClamp = mProgram.GetProcAddress("DrawColumnRt1SubClamp"); DrawColumnRt1RevSubClamp = mProgram.GetProcAddress("DrawColumnRt1RevSubClamp"); + DrawColumnRt1Translated = mProgram.GetProcAddress("DrawColumnRt1Translated"); + DrawColumnRt1TlatedAdd = mProgram.GetProcAddress("DrawColumnRt1TlatedAdd"); + DrawColumnRt1AddClampTranslated = mProgram.GetProcAddress("DrawColumnRt1AddClampTranslated"); + DrawColumnRt1SubClampTranslated = mProgram.GetProcAddress("DrawColumnRt1SubClampTranslated"); + DrawColumnRt1RevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRt1RevSubClampTranslated"); DrawColumnRt4 = mProgram.GetProcAddress("DrawColumnRt4"); DrawColumnRt4Copy = mProgram.GetProcAddress("DrawColumnRt4Copy"); DrawColumnRt4Add = mProgram.GetProcAddress("DrawColumnRt4Add"); @@ -166,6 +181,11 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawColumnRt4AddClamp = mProgram.GetProcAddress("DrawColumnRt4AddClamp"); DrawColumnRt4SubClamp = mProgram.GetProcAddress("DrawColumnRt4SubClamp"); DrawColumnRt4RevSubClamp = mProgram.GetProcAddress("DrawColumnRt4RevSubClamp"); + DrawColumnRt4Translated = mProgram.GetProcAddress("DrawColumnRt4Translated"); + DrawColumnRt4TlatedAdd = mProgram.GetProcAddress("DrawColumnRt4TlatedAdd"); + DrawColumnRt4AddClampTranslated = mProgram.GetProcAddress("DrawColumnRt4AddClampTranslated"); + DrawColumnRt4SubClampTranslated = mProgram.GetProcAddress("DrawColumnRt4SubClampTranslated"); + DrawColumnRt4RevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRt4RevSubClampTranslated"); DrawSpan = mProgram.GetProcAddress("DrawSpan"); DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 549825e4f..60a6c799a 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -144,6 +144,11 @@ public: void(*DrawColumnRt1AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt1SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt1RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; @@ -151,6 +156,11 @@ public: void(*DrawColumnRt4AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRt4RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4Translated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4TlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4AddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4SubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4RevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; diff --git a/src/r_draw.cpp b/src/r_draw.cpp index f255352f5..11c7020b4 100644 --- a/src/r_draw.cpp +++ b/src/r_draw.cpp @@ -191,6 +191,8 @@ FDynamicColormap ShadeFakeColormap[16]; BYTE identitymap[256]; FDynamicColormap identitycolormap; +bool drawer_needs_pal_input; + EXTERN_CVAR (Int, r_columnmethod) void R_InitShadeMaps() @@ -2516,6 +2518,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = transcolfunc; hcolfunc_post1 = rt_tlate1col; hcolfunc_post4 = rt_tlate4cols; + drawer_needs_pal_input = true; } return true; } @@ -2566,6 +2569,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = R_DrawTlatedAddColumn; hcolfunc_post1 = rt_tlateadd1col; hcolfunc_post4 = rt_tlateadd4cols; + drawer_needs_pal_input = true; } } else @@ -2587,6 +2591,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = R_DrawAddClampTranslatedColumn; hcolfunc_post1 = rt_tlateaddclamp1col; hcolfunc_post4 = rt_tlateaddclamp4cols; + drawer_needs_pal_input = true; } } return true; @@ -2609,6 +2614,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = R_DrawSubClampTranslatedColumn; hcolfunc_post1 = rt_tlatesubclamp1col; hcolfunc_post4 = rt_tlatesubclamp4cols; + drawer_needs_pal_input = true; } return true; @@ -2634,6 +2640,7 @@ static bool R_SetBlendFunc (int op, fixed_t fglevel, fixed_t bglevel, int flags) colfunc = R_DrawRevSubClampTranslatedColumn; hcolfunc_post1 = rt_tlaterevsubclamp1col; hcolfunc_post4 = rt_tlaterevsubclamp4cols; + drawer_needs_pal_input = true; } return true; @@ -2658,6 +2665,8 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, { fixed_t fglevel, bglevel; + drawer_needs_pal_input = false; + style.CheckFuzz(); if (style.BlendOp == STYLEOP_Shadow) @@ -2706,6 +2715,7 @@ ESPSResult R_SetPatchStyle (FRenderStyle style, fixed_t alpha, int translation, colfunc = R_DrawShadedColumn; hcolfunc_post1 = rt_shaded1col; hcolfunc_post4 = rt_shaded4cols; + drawer_needs_pal_input = true; dc_color = fixedcolormap ? fixedcolormap->Maps[APART(color)] : basecolormap->Maps[APART(color)]; basecolormap = &ShadeFakeColormap[16-alpha]; if (fixedlightlev >= 0 && fixedcolormap == NULL) diff --git a/src/r_drawt.cpp b/src/r_drawt.cpp index 837093044..43354bfd5 100644 --- a/src/r_drawt.cpp +++ b/src/r_drawt.cpp @@ -1121,6 +1121,7 @@ void R_FillColumnHorizP_C (void) void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) { int pixelsize = r_swtruecolor ? 4 : 1; + int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1; const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) { @@ -1189,7 +1190,7 @@ void R_DrawMaskedColumnHoriz (const BYTE *column, const FTexture::Span *span) endfrac -= dc_iscale; } } - dc_source = column + top; + dc_source = column + top * inputpixelsize; dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; hcolfunc_pre (); diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 98d8c2242..18ae228e4 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -124,6 +124,11 @@ DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVM DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1Translated, DrawColumnRt1Translated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1TlatedAdd, DrawColumnRt1TlatedAdd, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1AddClampTranslated, DrawColumnRt1AddClampTranslated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1SubClampTranslated, DrawColumnRt1SubClampTranslated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClampTranslated, DrawColumnRt1RevSubClampTranslated, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); @@ -131,129 +136,14 @@ DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVM DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4Translated, DrawColumnRt4Translated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4TlatedAdd, DrawColumnRt4TlatedAdd, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4AddClampTranslated, DrawColumnRt4AddClampTranslated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4SubClampTranslated, DrawColumnRt4SubClampTranslated, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClampTranslated, DrawColumnRt4RevSubClampTranslated, DrawColumnRt1LLVMCommand); ///////////////////////////////////////////////////////////////////////////// -class RtTranslate1colRGBACommand : public DrawerCommand -{ - const BYTE * RESTRICT translation; - int hx; - int yl; - int yh; - -public: - RtTranslate1colRGBACommand(const BYTE *translation, int hx, int yl, int yh) - { - this->translation = translation; - this->hx = hx; - this->yl = yl; - this->yh = yh; - } - - void Execute(DrawerThread *thread) override - { - int count = yh - yl + 1; - uint32_t *source = &thread->dc_temp_rgba[yl*4 + hx]; - - // Things we do to hit the compiler's optimizer with a clue bat: - // 1. Parallelism is explicitly spelled out by using a separate - // C instruction for each assembly instruction. GCC lets me - // have four temporaries, but VC++ spills to the stack with - // more than two. Two is probably optimal, anyway. - // 2. The results of the translation lookups are explicitly - // stored in byte-sized variables. This causes the VC++ code - // to use byte mov instructions in most cases; for apparently - // random reasons, it will use movzx for some places. GCC - // ignores this and uses movzx always. - - // Do 8 rows at a time. - for (int count8 = count >> 3; count8; --count8) - { - int c0, c1; - BYTE b0, b1; - - c0 = source[0]; c1 = source[4]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[4] = b1; - - c0 = source[8]; c1 = source[12]; - b0 = translation[c0]; b1 = translation[c1]; - source[8] = b0; source[12] = b1; - - c0 = source[16]; c1 = source[20]; - b0 = translation[c0]; b1 = translation[c1]; - source[16] = b0; source[20] = b1; - - c0 = source[24]; c1 = source[28]; - b0 = translation[c0]; b1 = translation[c1]; - source[24] = b0; source[28] = b1; - - source += 32; - } - // Finish by doing 1 row at a time. - for (count &= 7; count; --count, source += 4) - { - source[0] = translation[source[0]]; - } - } -}; - -class RtTranslate4colsRGBACommand : public DrawerCommand -{ - const BYTE * RESTRICT translation; - int yl; - int yh; - -public: - RtTranslate4colsRGBACommand(const BYTE *translation, int yl, int yh) - { - this->translation = translation; - this->yl = yl; - this->yh = yh; - } - - void Execute(DrawerThread *thread) override - { - int count = yh - yl + 1; - uint32_t *source = &thread->dc_temp_rgba[yl*4]; - int c0, c1; - BYTE b0, b1; - - // Do 2 rows at a time. - for (int count8 = count >> 1; count8; --count8) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - - c0 = source[4]; c1 = source[5]; - b0 = translation[c0]; b1 = translation[c1]; - source[4] = b0; source[5] = b1; - - c0 = source[6]; c1 = source[7]; - b0 = translation[c0]; b1 = translation[c1]; - source[6] = b0; source[7] = b1; - - source += 8; - } - // Do the final row if count was odd. - if (count & 1) - { - c0 = source[0]; c1 = source[1]; - b0 = translation[c0]; b1 = translation[c1]; - source[0] = b0; source[1] = b1; - - c0 = source[2]; c1 = source[3]; - b0 = translation[c0]; b1 = translation[c1]; - source[2] = b0; source[3] = b1; - } - } -}; - class RtInitColsRGBACommand : public DrawerCommand { BYTE * RESTRICT buff; @@ -270,12 +160,13 @@ public: } }; +template class DrawColumnHorizRGBACommand : public DrawerCommand { int _count; fixed_t _iscale; fixed_t _texturefrac; - const BYTE * RESTRICT _source; + const InputPixelType * RESTRICT _source; int _x; int _yl; int _yh; @@ -286,7 +177,7 @@ public: _count = dc_count; _iscale = dc_iscale; _texturefrac = dc_texturefrac; - _source = dc_source; + _source = (const InputPixelType *)dc_source; _x = dc_x; _yl = dc_yl; _yh = dc_yh; @@ -309,7 +200,7 @@ public: fracstep = _iscale; frac = _texturefrac; - const BYTE *source = _source; + const InputPixelType *source = _source; if (count & 1) { *dest = source[frac >> FRACBITS]; dest += 4; frac += fracstep; @@ -419,28 +310,16 @@ void rt_map4cols_rgba (int sx, int yl, int yh) DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } -void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(translation, hx, yl, yh); -} - -void rt_Translate4cols_rgba(const BYTE *translation, int yl, int yh) -{ - DrawerCommandQueue::QueueCommand(translation, yl, yh); -} - // Translates one span at hx to the screen at sx. void rt_tlate1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_map1col(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates all four spans to the screen starting at sx. void rt_tlate4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_map4cols(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Adds one span at hx to the screen at sx without clamping. @@ -458,15 +337,13 @@ void rt_add4cols_rgba (int sx, int yl, int yh) // Translates and adds one span at hx to the screen at sx without clamping. void rt_tlateadd1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_add1col(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx without clamping. void rt_tlateadd4cols_rgba(int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_add4cols(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Shades one span at hx to the screen at sx. @@ -496,15 +373,13 @@ void rt_addclamp4cols_rgba (int sx, int yl, int yh) // Translates and adds one span at hx to the screen at sx with clamping. void rt_tlateaddclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_addclamp1col_rgba(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and adds all four spans to the screen starting at sx with clamping. void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_addclamp4cols(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Subtracts one span at hx to the screen at sx with clamping. @@ -522,15 +397,13 @@ void rt_subclamp4cols_rgba (int sx, int yl, int yh) // Translates and subtracts one span at hx to the screen at sx with clamping. void rt_tlatesubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_subclamp1col_rgba(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and subtracts all four spans to the screen starting at sx with clamping. void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_subclamp4cols_rgba(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Subtracts one span at hx from the screen at sx with clamping. @@ -548,15 +421,13 @@ void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) // Translates and subtracts one span at hx from the screen at sx with clamping. void rt_tlaterevsubclamp1col_rgba (int hx, int sx, int yl, int yh) { - rt_Translate1col_rgba(dc_translation, hx, yl, yh); - rt_revsubclamp1col_rgba(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Translates and subtracts all four spans from the screen starting at sx with clamping. void rt_tlaterevsubclamp4cols_rgba (int sx, int yl, int yh) { - rt_Translate4cols_rgba(dc_translation, yl, yh); - rt_revsubclamp4cols_rgba(sx, yl, yh); + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Before each pass through a rendering loop that uses these routines, @@ -590,7 +461,10 @@ void R_DrawColumnHoriz_rgba (void) (*span)[1] = dc_yh; *span += 2; - DrawerCommandQueue::QueueCommand(); + if (drawer_needs_pal_input) + DrawerCommandQueue::QueueCommand>(); + else + DrawerCommandQueue::QueueCommand>(); } // [RH] Just fills a column with a given color diff --git a/src/r_main.h b/src/r_main.h index 8d1867526..6a802e799 100644 --- a/src/r_main.h +++ b/src/r_main.h @@ -126,7 +126,7 @@ extern void (*hcolfunc_pre) (void); extern void (*hcolfunc_post1) (int hx, int sx, int yl, int yh); extern void (*hcolfunc_post2) (int hx, int sx, int yl, int yh); extern void (*hcolfunc_post4) (int sx, int yl, int yh); - +extern bool drawer_needs_pal_input; void R_InitTextureMapping (); diff --git a/src/r_segs.cpp b/src/r_segs.cpp index 078f1d921..92e6a447a 100644 --- a/src/r_segs.cpp +++ b/src/r_segs.cpp @@ -197,7 +197,11 @@ static void BlastMaskedColumn (void (*blastfunc)(const BYTE *pixels, const FText // draw the texture const FTexture::Span *spans; - const BYTE *pixels = tex->GetColumn (maskedtexturecol[dc_x] >> FRACBITS, &spans); + const BYTE *pixels; + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)tex->GetColumnBgra(maskedtexturecol[dc_x] >> FRACBITS, &spans); + else + pixels = tex->GetColumn(maskedtexturecol[dc_x] >> FRACBITS, &spans); blastfunc (pixels, spans); rw_light += rw_lightstep; spryscale += rw_scalestep; diff --git a/src/r_things.cpp b/src/r_things.cpp index 639ed725e..8e306e04f 100644 --- a/src/r_things.cpp +++ b/src/r_things.cpp @@ -253,6 +253,7 @@ bool sprflipvert; void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) { int pixelsize = r_swtruecolor ? 4 : 1; + int inputpixelsize = (r_swtruecolor && !drawer_needs_pal_input) ? 4 : 1; const fixed_t centeryfrac = FLOAT2FIXED(CenterY); const fixed_t texturemid = FLOAT2FIXED(dc_texturemid); while (span->Length != 0) @@ -322,7 +323,7 @@ void R_DrawMaskedColumn (const BYTE *column, const FTexture::Span *span) endfrac -= dc_iscale; } } - dc_source = column + top; + dc_source = column + top * inputpixelsize; dc_dest = (ylookup[dc_yl] + dc_x) * pixelsize + dc_destorg; dc_count = dc_yh - dc_yl + 1; colfunc (); @@ -469,7 +470,11 @@ void R_DrawVisSprite (vissprite_t *vis) { while ((dc_x < stop4) && (dc_x & 3)) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); + else + pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) R_DrawMaskedColumn (pixels, spans); dc_x++; @@ -481,7 +486,11 @@ void R_DrawVisSprite (vissprite_t *vis) rt_initcols(nullptr); for (int zz = 4; zz; --zz) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); + else + pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) R_DrawMaskedColumnHoriz (pixels, spans); dc_x++; @@ -492,7 +501,11 @@ void R_DrawVisSprite (vissprite_t *vis) while (dc_x < x2) { - pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)tex->GetColumnBgra (frac >> FRACBITS, &spans); + else + pixels = tex->GetColumn (frac >> FRACBITS, &spans); + if (ispsprite || !R_ClipSpriteColumnWithPortals(vis)) R_DrawMaskedColumn (pixels, spans); dc_x++; @@ -650,7 +663,10 @@ void R_WallSpriteColumn (void (*drawfunc)(const BYTE *column, const FTexture::Sp const BYTE *column; const FTexture::Span *spans; - column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); + if (r_swtruecolor && !drawer_needs_pal_input) + column = (const BYTE *)WallSpriteTile->GetColumnBgra (lwall[dc_x] >> FRACBITS, &spans); + else + column = WallSpriteTile->GetColumn (lwall[dc_x] >> FRACBITS, &spans); dc_texturefrac = 0; drawfunc (column, spans); rw_light += rw_lightstep; diff --git a/src/v_draw.cpp b/src/v_draw.cpp index 37ced09d5..1c23523d6 100644 --- a/src/v_draw.cpp +++ b/src/v_draw.cpp @@ -305,7 +305,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) { while ((dc_x < stop4) && (dc_x & 3)) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); + else + pixels = img->GetColumn(frac >> FRACBITS, spanptr); + R_DrawMaskedColumn(pixels, spans); dc_x++; frac += xiscale_i; @@ -316,7 +320,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) rt_initcols(nullptr); for (int zz = 4; zz; --zz) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); + else + pixels = img->GetColumn(frac >> FRACBITS, spanptr); + R_DrawMaskedColumnHoriz(pixels, spans); dc_x++; frac += xiscale_i; @@ -326,7 +334,11 @@ void DCanvas::DrawTextureParms(FTexture *img, DrawParms &parms) while (dc_x < x2_i) { - pixels = img->GetColumn(frac >> FRACBITS, spanptr); + if (r_swtruecolor && !drawer_needs_pal_input) + pixels = (const BYTE *)img->GetColumnBgra(frac >> FRACBITS, spanptr); + else + pixels = img->GetColumn(frac >> FRACBITS, spanptr); + R_DrawMaskedColumn(pixels, spans); dc_x++; frac += xiscale_i;