From bd9ec843dd8d8e9af66179b3a11de68f7e26bdad Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Sun, 20 Nov 2016 16:42:53 +0100 Subject: [PATCH] Add palette version of the blend modes --- .../fixedfunction/drawtrianglecodegen.cpp | 153 ++++++++++++++---- .../fixedfunction/drawtrianglecodegen.h | 13 +- src/r_compiler/llvmdrawers.cpp | 6 +- src/r_compiler/llvmdrawers.h | 4 +- src/r_poly_particle.cpp | 14 +- src/r_poly_triangle.cpp | 4 +- 6 files changed, 142 insertions(+), 52 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp index c59992d3b..da6933149 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.cpp @@ -505,7 +505,7 @@ void DrawTriangleCodegen::LoopPartialBlock() loopy.end_block(); } -SSAVec4i DrawTriangleCodegen::TranslateSample(SSAInt uvoffset) +SSAVec4i DrawTriangleCodegen::TranslateSample32(SSAInt uvoffset) { if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) return translation[color * 4].load_vec4ub(true); @@ -513,7 +513,15 @@ SSAVec4i DrawTriangleCodegen::TranslateSample(SSAInt uvoffset) return translation[texturePixels[uvoffset].load(true).zext_int() * 4].load_vec4ub(true); } -SSAVec4i DrawTriangleCodegen::Sample(SSAInt uvoffset) +SSAInt DrawTriangleCodegen::TranslateSample8(SSAInt uvoffset) +{ + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return translation[color].load(true).zext_int(); + else + return translation[texturePixels[uvoffset].load(true).zext_int()].load(true).zext_int(); +} + +SSAVec4i DrawTriangleCodegen::Sample32(SSAInt uvoffset) { if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) return SSAVec4i::unpack(color); @@ -521,6 +529,19 @@ SSAVec4i DrawTriangleCodegen::Sample(SSAInt uvoffset) return texturePixels[uvoffset * 4].load_vec4ub(true); } +SSAInt DrawTriangleCodegen::Sample8(SSAInt uvoffset) +{ + if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + return color; + else + return texturePixels[uvoffset].load(true).zext_int(); +} + +SSAInt DrawTriangleCodegen::Shade8(SSAInt c) +{ + return currentcolormap[c].load(true).zext_int(); +} + SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) { SSAInt ufrac = varying[0]; @@ -538,48 +559,54 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) { default: case TriBlendMode::Copy: - fg = Sample(uvoffset); - output = blend_copy(shade_bgra_simple(fg, currentlight)); break; + fg = Sample32(uvoffset); + output = blend_copy(shade_bgra_simple(fg, currentlight)); + break; case TriBlendMode::AlphaBlend: - fg = Sample(uvoffset); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; + fg = Sample32(uvoffset); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); + break; case TriBlendMode::AddSolid: - fg = Sample(uvoffset); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); break; + fg = Sample32(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, destalpha); + break; case TriBlendMode::Add: - fg = Sample(uvoffset); - output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + fg = Sample32(uvoffset); + output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; case TriBlendMode::Sub: - fg = Sample(uvoffset); - output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + fg = Sample32(uvoffset); + output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; case TriBlendMode::RevSub: - fg = Sample(uvoffset); - output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; + fg = Sample32(uvoffset); + output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); + break; case TriBlendMode::Shaded: - fg = Sample(uvoffset); + fg = Sample32(uvoffset); alpha = fg[0]; alpha = alpha + (alpha >> 7); // 255 -> 256 inv_alpha = 256 - alpha; output = blend_add(shade_bgra_simple(SSAVec4i::unpack(color), currentlight), bg, alpha, inv_alpha); break; case TriBlendMode::TranslateCopy: - fg = TranslateSample(uvoffset); + fg = TranslateSample32(uvoffset); output = blend_copy(shade_bgra_simple(fg, currentlight)); break; case TriBlendMode::TranslateAlphaBlend: - fg = TranslateSample(uvoffset); - output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; + fg = TranslateSample32(uvoffset); + output = blend_alpha_blend(shade_bgra_simple(fg, currentlight), bg); break; case TriBlendMode::TranslateAdd: - fg = TranslateSample(uvoffset); + fg = TranslateSample32(uvoffset); output = blend_add(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::TranslateSub: - fg = TranslateSample(uvoffset); + fg = TranslateSample32(uvoffset); output = blend_sub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; case TriBlendMode::TranslateRevSub: - fg = TranslateSample(uvoffset); + fg = TranslateSample32(uvoffset); output = blend_revsub(shade_bgra_simple(fg, currentlight), bg, srcalpha, calc_blend_bgalpha(fg, destalpha)); break; } @@ -587,6 +614,18 @@ SSAVec4i DrawTriangleCodegen::ProcessPixel32(SSAVec4i bg, SSAInt *varying) return output; } +SSAVec4i DrawTriangleCodegen::ToBgra(SSAInt index) +{ + SSAVec4i c = BaseColors[index * 4].load_vec4ub(true); + c = c.insert(3, 255); + return c; +} + +SSAInt DrawTriangleCodegen::ToPal8(SSAVec4i c) +{ + return RGB32k[((c[2] >> 3) * 32 + (c[1] >> 3)) * 32 + (c[0] >> 3)].load(true).zext_int(); +} + SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) { SSAInt ufrac = varying[0]; @@ -596,19 +635,65 @@ SSAInt DrawTriangleCodegen::ProcessPixel8(SSAInt bg, SSAInt *varying) SSAInt vpos = ((vfrac >> 16) * textureHeight) >> 16; SSAInt uvoffset = upos * textureHeight + vpos; - if (variant == TriDrawVariant::FillNormal || variant == TriDrawVariant::FillSubsector || variant == TriDrawVariant::FuzzSubsector) + SSAVec4i fg; + SSAInt alpha, inv_alpha; + SSAInt output; + SSAInt palindex; + + switch (blendmode) { - return currentcolormap[color].load(true).zext_int(); - } - else - { - SSAInt index = texturePixels[uvoffset].load(true).zext_int(); - SSAInt fg = currentcolormap[index].load(true).zext_int(); - if (blendmode != TriBlendMode::AlphaBlend) - return fg; - else - return (index == SSAInt(0)).select(bg, fg); + default: + case TriBlendMode::Copy: + output = Shade8(Sample8(uvoffset)); + break; + case TriBlendMode::AlphaBlend: + palindex = Sample8(uvoffset); + output = (palindex == SSAInt(0)).select(bg, Shade8(palindex)); + break; + case TriBlendMode::AddSolid: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, destalpha)); + break; + case TriBlendMode::Add: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::Sub: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::RevSub: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::Shaded: + alpha = Sample8(uvoffset); + alpha = alpha + (alpha >> 7); // 255 -> 256 + inv_alpha = 256 - alpha; + output = ToPal8(blend_add(ToBgra(Shade8(color)), ToBgra(bg), alpha, inv_alpha)); + break; + case TriBlendMode::TranslateCopy: + output = Shade8(TranslateSample8(uvoffset)); + break; + case TriBlendMode::TranslateAlphaBlend: + palindex = TranslateSample8(uvoffset); + output = (palindex == SSAInt(0)).select(bg, Shade8(palindex)); + break; + case TriBlendMode::TranslateAdd: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_add(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::TranslateSub: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_sub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; + case TriBlendMode::TranslateRevSub: + fg = ToBgra(Shade8(Sample8(uvoffset))); + output = ToPal8(blend_revsub(fg, ToBgra(bg), srcalpha, calc_blend_bgalpha(fg, destalpha))); + break; } + + return output; } void DrawTriangleCodegen::SetStencilBlock(SSAInt block) @@ -685,9 +770,7 @@ void DrawTriangleCodegen::LoadArgs(SSAValue args, SSAValue thread_data) { Colormaps = args[0][20].load(true); RGB32k = args[0][21].load(true); - Col2RGB8 = args[0][22].load(true); - Col2RGB8_LessPrecision = args[0][23].load(true); - Col2RGB8_Inverse = args[0][24].load(true); + BaseColors = args[0][22].load(true); } thread.core = thread_data[0][0].load(true); diff --git a/src/r_compiler/fixedfunction/drawtrianglecodegen.h b/src/r_compiler/fixedfunction/drawtrianglecodegen.h index d2ff95e02..74358f99a 100644 --- a/src/r_compiler/fixedfunction/drawtrianglecodegen.h +++ b/src/r_compiler/fixedfunction/drawtrianglecodegen.h @@ -49,8 +49,13 @@ private: SSAVec4i ProcessPixel32(SSAVec4i bg, SSAInt *varying); SSAInt ProcessPixel8(SSAInt bg, SSAInt *varying); - SSAVec4i TranslateSample(SSAInt uvoffset); - SSAVec4i Sample(SSAInt uvoffset); + SSAVec4i TranslateSample32(SSAInt uvoffset); + SSAInt TranslateSample8(SSAInt uvoffset); + SSAVec4i Sample32(SSAInt uvoffset); + SSAInt Sample8(SSAInt uvoffset); + SSAInt Shade8(SSAInt c); + SSAVec4i ToBgra(SSAInt index); + SSAInt ToPal8(SSAVec4i c); void SetStencilBlock(SSAInt block); void StencilSet(SSAInt x, SSAInt y, SSAUByte value); @@ -109,9 +114,7 @@ private: SSAUBytePtr Colormaps; SSAUBytePtr RGB32k; - SSAIntPtr Col2RGB8; - SSAIntPtr Col2RGB8_LessPrecision; - SSAIntPtr Col2RGB8_Inverse; + SSAUBytePtr BaseColors; SSAWorkerThread thread; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 869a24c2c..dea72a1ac 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -127,7 +127,7 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - int version = 3; // Increment this number if the drawer codegen is modified (forces recreation of the module). + int version = 4; // Increment this number if the drawer codegen is modified (forces recreation of the module). std::string targetCPU = mProgram.GetTargetCPU(); bool loaded = mProgram.LoadCachedModule(version, targetCPU); if (!loaded) @@ -584,9 +584,7 @@ llvm::Type *LLVMDrawersImpl::GetTriDrawTriangleArgs(llvm::LLVMContext &context) elements.push_back(llvm::Type::getInt32PtrTy(context)); // uint32_t *subsectorGBuffer; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *colormaps; elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *RGB32k; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8_LessPrecision; - elements.push_back(llvm::Type::getInt32PtrTy(context)); // const uint32_t *Col2RGB8_Inverse; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *BaseColors; return llvm::StructType::create(context, elements, "TriDrawTriangle", false)->getPointerTo(); } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 873f98c73..1cbafa11a 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -260,9 +260,7 @@ struct TriDrawTriangleArgs uint32_t *subsectorGBuffer; const uint8_t *colormaps; const uint8_t *RGB32k; - const uint32_t *Col2RGB8; - const uint32_t *Col2RGB8_LessPrecision; - const uint32_t *Col2RGB8_Inverse; + const uint8_t *BaseColors; }; enum class TriDrawVariant diff --git a/src/r_poly_particle.cpp b/src/r_poly_particle.cpp index 619b3e146..9b1dbaf1c 100644 --- a/src/r_poly_particle.cpp +++ b/src/r_poly_particle.cpp @@ -82,8 +82,18 @@ void RenderPolyParticle::Render(const TriMatrix &worldToClip, particle_t *partic uniforms.flags = 0; } uniforms.subsectorDepth = subsectorDepth; - uint32_t alpha = particle->trans; - uniforms.color = (alpha << 24) | (particle->color & 0xffffff); + + if (r_swtruecolor) + { + uint32_t alpha = particle->trans; + uniforms.color = (alpha << 24) | (particle->color & 0xffffff); + } + else + { + uniforms.color = ((uint32_t)particle->color) >> 24; + uniforms.srcalpha = particle->trans; + uniforms.destalpha = 255 - particle->trans; + } PolyDrawArgs args; args.uniforms = uniforms; diff --git a/src/r_poly_triangle.cpp b/src/r_poly_triangle.cpp index a00ea90eb..2ba3336d1 100644 --- a/src/r_poly_triangle.cpp +++ b/src/r_poly_triangle.cpp @@ -112,9 +112,7 @@ void PolyTriangleDrawer::draw_arrays(const PolyDrawArgs &drawargs, TriDrawVarian args.subsectorGBuffer = PolySubsectorGBuffer::Instance()->Values(); args.colormaps = drawargs.colormaps; args.RGB32k = RGB32k.All; - args.Col2RGB8 = (const uint32_t*)Col2RGB8; - args.Col2RGB8_Inverse = (const uint32_t*)Col2RGB8_Inverse; - args.Col2RGB8_LessPrecision = (const uint32_t*)Col2RGB8_LessPrecision; + args.BaseColors = (const uint8_t *)GPalette.BaseColors; bool ccw = drawargs.ccw; const TriVertex *vinput = drawargs.vinput;