From c1e859dbca3e6301bf570bf1bdc467f90c415d4e Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 7 Oct 2016 03:38:43 +0200 Subject: [PATCH] Added codegen for column drawers --- .../fixedfunction/drawcolumncodegen.cpp | 147 +++- .../fixedfunction/drawcolumncodegen.h | 57 +- src/r_compiler/llvmdrawers.cpp | 86 +++ src/r_compiler/llvmdrawers.h | 51 ++ src/r_compiler/ssa/ssa_ubyte.cpp | 6 + src/r_compiler/ssa/ssa_ubyte.h | 4 + src/r_compiler/ssa/ssa_ubyte_ptr.cpp | 16 +- src/r_compiler/ssa/ssa_vec4i.cpp | 13 + src/r_compiler/ssa/ssa_vec4i.h | 1 + src/r_draw_rgba.cpp | 718 +++--------------- 10 files changed, 449 insertions(+), 650 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 67d801162b..116744f1cb 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -11,6 +11,151 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args) +void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data) { + dest = args[0][0].load(); + source = args[0][1].load(); + colormap = args[0][2].load(); + translation = args[0][3].load(); + basecolors = args[0][4].load(); + pitch = args[0][5].load(); + count = args[0][6].load(); + dest_y = args[0][7].load(); + iscale = args[0][8].load(); + texturefrac = args[0][9].load(); + light = args[0][10].load(); + color = SSAVec4i::unpack(args[0][11].load()); + srccolor = SSAVec4i::unpack(args[0][12].load()); + srcalpha = args[0][13].load(); + destalpha = args[0][14].load(); + SSAShort light_alpha = args[0][15].load(); + SSAShort light_red = args[0][16].load(); + SSAShort light_green = args[0][17].load(); + SSAShort light_blue = args[0][18].load(); + SSAShort fade_alpha = args[0][19].load(); + SSAShort fade_red = args[0][20].load(); + SSAShort fade_green = args[0][21].load(); + SSAShort fade_blue = args[0][22].load(); + SSAShort desaturate = args[0][23].load(); + SSAInt flags = args[0][24].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + thread.core = thread_data[0][0].load(); + thread.num_cores = thread_data[0][1].load(); + thread.pass_start_y = thread_data[0][2].load(); + thread.pass_end_y = thread_data[0][3].load(); + + is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); + + count = count_for_thread(dest_y, count, thread); + dest = dest_for_thread(dest_y, pitch, dest, thread); + pitch = pitch * thread.num_cores; + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + iscale = iscale * thread.num_cores; + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + Loop(variant, true); + branch.else_block(); + Loop(variant, false); + branch.end_block(); +} + +void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade) +{ + stack_index.store(SSAInt(0)); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt frac = stack_frac.load(); + + SSAInt offset = index * pitch * 4; + SSAVec4i bgcolor = dest[offset].load_vec4ub(); + + SSAInt alpha, inv_alpha; + SSAVec4i outcolor; + switch (variant) + { + default: + case DrawColumnVariant::Draw: + outcolor = blend_copy(Shade(ColormapSample(frac), isSimpleShade)); + break; + case DrawColumnVariant::DrawAdd: + case DrawColumnVariant::DrawAddClamp: + outcolor = blend_add(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawShaded: + alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(frac), SSAInt(64)), SSAInt(0)) * 4; + inv_alpha = 256 - alpha; + outcolor = blend_add(color, bgcolor, alpha, inv_alpha); + break; + case DrawColumnVariant::DrawSubClamp: + outcolor = blend_sub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawRevSubClamp: + outcolor = blend_revsub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawTranslated: + outcolor = blend_copy(Shade(TranslateSample(frac), isSimpleShade)); + break; + case DrawColumnVariant::DrawTlatedAdd: + case DrawColumnVariant::DrawAddClampTranslated: + outcolor = blend_add(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawSubClampTranslated: + outcolor = blend_sub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::DrawRevSubClampTranslated: + outcolor = blend_revsub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::Fill: + outcolor = blend_copy(color); + break; + case DrawColumnVariant::FillAdd: + alpha = srccolor[3]; + alpha = alpha + (alpha >> 7); + inv_alpha = 256 - alpha; + outcolor = blend_add(srccolor, bgcolor, alpha, inv_alpha); + break; + case DrawColumnVariant::FillAddClamp: + outcolor = blend_add(srccolor, bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::FillSubClamp: + outcolor = blend_sub(srccolor, bgcolor, srcalpha, destalpha); + break; + case DrawColumnVariant::FillRevSubClamp: + outcolor = blend_revsub(srccolor, bgcolor, srcalpha, destalpha); + break; + } + + dest[offset].store_vec4ub(outcolor); + + stack_index.store(index + 1); + stack_frac.store(frac + iscale); + loop.end_block(); + } +} + +SSAInt DrawColumnCodegen::ColormapSample(SSAInt frac) +{ + SSAInt sample_index = frac >> FRACBITS; + return colormap[source[sample_index].load().zext_int()].load().zext_int(); +} + +SSAInt DrawColumnCodegen::TranslateSample(SSAInt frac) +{ + SSAInt sample_index = frac >> FRACBITS; + return translation[source[sample_index].load().zext_int()].load().zext_int(); +} + +SSAVec4i DrawColumnCodegen::Shade(SSAInt palIndex, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_pal_index_simple(palIndex, light, basecolors); + else + return shade_pal_index_advanced(palIndex, light, shade_constants, basecolors); } diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index 0749def7f5..488c36295f 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -5,22 +5,53 @@ enum class DrawColumnVariant { - Opaque, - Fuzz, - Add, - Translated, - TlatedAdd, - Shaded, - AddClamp, - AddClampTranslated, - SubClamp, - SubClampTranslated, - RevSubClamp, - RevSubClampTranslated + Fill, + FillAdd, + FillAddClamp, + FillSubClamp, + FillRevSubClamp, + Draw, + DrawAdd, + DrawTranslated, + DrawTlatedAdd, + DrawShaded, + DrawAddClamp, + DrawAddClampTranslated, + DrawSubClamp, + DrawSubClampTranslated, + DrawRevSubClamp, + DrawRevSubClampTranslated }; class DrawColumnCodegen : public DrawerCodegen { public: - void Generate(DrawColumnVariant variant, SSAValue args); + void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data); + +private: + void Loop(DrawColumnVariant variant, bool isSimpleShade); + SSAInt ColormapSample(SSAInt frac); + SSAInt TranslateSample(SSAInt frac); + SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade); + + SSAStack stack_index, stack_frac; + + SSAUBytePtr dest; + SSAUBytePtr source; + SSAUBytePtr colormap; + SSAUBytePtr translation; + SSAUBytePtr basecolors; + SSAInt pitch; + SSAInt count; + SSAInt dest_y; + SSAInt iscale; + SSAInt texturefrac; + SSAInt light; + SSAVec4i color; + SSAVec4i srccolor; + SSAInt srcalpha; + SSAInt destalpha; + SSABool is_simple_shade; + SSAShadeConstants shade_constants; + SSAWorkerThread thread; }; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 7691af35b1..6ab4f5a4fd 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -47,9 +47,11 @@ public: LLVMDrawersImpl(); private: + void CodegenDrawColumn(const char *name, DrawColumnVariant variant); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); @@ -82,6 +84,22 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); @@ -104,6 +122,22 @@ LLVMDrawersImpl::LLVMDrawersImpl() mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); + FillColumn = mProgram.GetProcAddress("FillColumn"); + FillColumnAdd = mProgram.GetProcAddress("FillColumnAdd"); + FillColumnAddClamp = mProgram.GetProcAddress("FillColumnAddClamp"); + FillColumnSubClamp = mProgram.GetProcAddress("FillColumnSubClamp"); + FillColumnRevSubClamp = mProgram.GetProcAddress("FillColumnRevSubClamp"); + DrawColumn = mProgram.GetProcAddress("DrawColumn"); + DrawColumnAdd = mProgram.GetProcAddress("DrawColumnAdd"); + DrawColumnTranslated = mProgram.GetProcAddress("DrawColumnTranslated"); + DrawColumnTlatedAdd = mProgram.GetProcAddress("DrawColumnTlatedAdd"); + DrawColumnShaded = mProgram.GetProcAddress("DrawColumnShaded"); + DrawColumnAddClamp = mProgram.GetProcAddress("DrawColumnAddClamp"); + DrawColumnAddClampTranslated = mProgram.GetProcAddress("DrawColumnAddClampTranslated"); + DrawColumnSubClamp = mProgram.GetProcAddress("DrawColumnSubClamp"); + DrawColumnSubClampTranslated = mProgram.GetProcAddress("DrawColumnSubClampTranslated"); + DrawColumnRevSubClamp = mProgram.GetProcAddress("DrawColumnRevSubClamp"); + DrawColumnRevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRevSubClampTranslated"); DrawSpan = mProgram.GetProcAddress("DrawSpan"); DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); @@ -126,6 +160,27 @@ LLVMDrawersImpl::LLVMDrawersImpl() mProgram.StopLogFatalErrors(); } +void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetDrawColumnArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); + function.create_public(); + + DrawColumnCodegen codegen; + codegen.Generate(variant, function.parameter(0), function.parameter(1)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) { llvm::IRBuilder<> builder(mProgram.context()); @@ -167,6 +222,37 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, mProgram.functionPassManager()->run(*function.func); } +llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { std::vector elements; diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index b1039cf496..2ce4c52306 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -77,6 +77,40 @@ struct DrawSpanArgs }; }; +struct DrawColumnArgs +{ + uint32_t *dest; + const uint8_t *source; + uint8_t *colormap; + uint8_t *translation; + const uint32_t *basecolors; + int32_t pitch; + int32_t count; + int32_t dest_y; + uint32_t iscale; + uint32_t texturefrac; + uint32_t light; + uint32_t color; + uint32_t srccolor; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1 + }; +}; + class LLVMDrawers { public: @@ -86,6 +120,23 @@ public: static void Destroy(); static LLVMDrawers *Instance(); + void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr; diff --git a/src/r_compiler/ssa/ssa_ubyte.cpp b/src/r_compiler/ssa/ssa_ubyte.cpp index 3204d064d1..6fe9c3bb16 100644 --- a/src/r_compiler/ssa/ssa_ubyte.cpp +++ b/src/r_compiler/ssa/ssa_ubyte.cpp @@ -1,6 +1,7 @@ #include "r_compiler/llvm_include.h" #include "ssa_ubyte.h" +#include "ssa_int.h" #include "ssa_scope.h" SSAUByte::SSAUByte() @@ -24,6 +25,11 @@ llvm::Type *SSAUByte::llvm_type() return llvm::Type::getInt8Ty(SSAScope::context()); } +SSAInt SSAUByte::zext_int() +{ + return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint())); +} + SSAUByte operator+(const SSAUByte &a, const SSAUByte &b) { return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_ubyte.h b/src/r_compiler/ssa/ssa_ubyte.h index ef878b3259..41ed3939be 100644 --- a/src/r_compiler/ssa/ssa_ubyte.h +++ b/src/r_compiler/ssa/ssa_ubyte.h @@ -4,6 +4,8 @@ namespace llvm { class Value; } namespace llvm { class Type; } +class SSAInt; + class SSAUByte { public: @@ -13,6 +15,8 @@ public: static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } static llvm::Type *llvm_type(); + SSAInt zext_int(); + llvm::Value *v; }; diff --git a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp index 98bf27c462..34de0ab889 100644 --- a/src/r_compiler/ssa/ssa_ubyte_ptr.cpp +++ b/src/r_compiler/ssa/ssa_ubyte_ptr.cpp @@ -30,22 +30,8 @@ SSAUByte SSAUBytePtr::load() const SSAVec4i SSAUBytePtr::load_vec4ub() const { - // _mm_cvtsi32_si128 as implemented by clang: SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); - llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); - v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); - SSAVec4i v4i = SSAVec4i::from_llvm(v); - - SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8 - return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 -/* - llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo(); - llvm::Type *m4xint32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); - llvm::Value *v4ub = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false, SSAScope::hint()); - return SSAVec4i::from_llvm(SSAScope::builder().CreateZExt(v4ub, m4xint32type)); -*/ + return SSAVec4i::unpack(i32); } SSAVec16ub SSAUBytePtr::load_vec16ub() const diff --git a/src/r_compiler/ssa/ssa_vec4i.cpp b/src/r_compiler/ssa/ssa_vec4i.cpp index 3b508412f3..3be0ec194a 100644 --- a/src/r_compiler/ssa/ssa_vec4i.cpp +++ b/src/r_compiler/ssa/ssa_vec4i.cpp @@ -97,6 +97,19 @@ llvm::Type *SSAVec4i::llvm_type() return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); } +SSAVec4i SSAVec4i::unpack(SSAInt i32) +{ + // _mm_cvtsi32_si128 as implemented by clang: + llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint()); + v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint()); + SSAVec4i v4i = SSAVec4i::from_llvm(v); + + SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7)); // _mm_unpacklo_epi8 + return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16 +} + SSAVec4i SSAVec4i::bitcast(SSAVec4f f32) { return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_vec4i.h b/src/r_compiler/ssa/ssa_vec4i.h index 89cda16465..f8ef92f1e2 100644 --- a/src/r_compiler/ssa/ssa_vec4i.h +++ b/src/r_compiler/ssa/ssa_vec4i.h @@ -24,6 +24,7 @@ public: SSAVec4i insert(SSAInt index, SSAInt value); SSAVec4i insert(int index, SSAInt value); SSAVec4i insert(int index, int value); + static SSAVec4i unpack(SSAInt value); static SSAVec4i bitcast(SSAVec4f f32); static SSAVec4i bitcast(SSAVec8s i16); static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index c5b1b478e6..7da2f183f8 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -396,56 +396,6 @@ public: } }; -class DrawWallMasked4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->mvlinec4(&args, &d); - } -}; - -class DrawWallAdd4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline4_add(&args, &d); - } -}; - -class DrawWallAddClamp4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline4_addclamp(&args, &d); - } -}; - -class DrawWallSubClamp4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline4_subclamp(&args, &d); - } -}; - -class DrawWallRevSubClamp4LLVMCommand : public DrawWall4LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline4_revsubclamp(&args, &d); - } -}; - class DrawWall1LLVMCommand : public DrawerCommand { protected: @@ -500,575 +450,101 @@ public: } }; -class DrawWallMasked1LLVMCommand : public DrawWall1LLVMCommand +class DrawColumnLLVMCommand : public DrawerCommand { +protected: + DrawColumnArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + public: + DrawColumnLLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.source = dc_source; + args.colormap = dc_colormap; + args.translation = dc_translation; + args.basecolors = (const uint32_t *)GPalette.BaseColors; + args.pitch = dc_pitch; + args.count = dc_count; + args.dest_y = _dest_y; + args.iscale = dc_iscale; + args.texturefrac = dc_texturefrac; + args.light = LightBgra::calc_light_multiplier(dc_light); + args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); + args.srccolor = dc_srccolor_bgra; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawColumnArgs::simple_shade; + } + void Execute(DrawerThread *thread) override { WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->mvlinec1(&args, &d); + LLVMDrawers::Instance()->DrawColumn(&args, &d); } }; -class DrawWallAdd1LLVMCommand : public DrawWall1LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline1_add(&args, &d); - } +#define DECLARE_DRAW_COMMAND(name, func, base) \ +class name##LLVMCommand : public base \ +{ \ +public: \ + void Execute(DrawerThread *thread) override \ + { \ + WorkerThreadData d = ThreadData(thread); \ + LLVMDrawers::Instance()->func(&args, &d); \ + } \ }; -class DrawWallAddClamp1LLVMCommand : public DrawWall1LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline1_addclamp(&args, &d); - } -}; +//DECLARE_DRAW_COMMAND(name, func, DrawSpanLLVMCommand); -class DrawWallSubClamp1LLVMCommand : public DrawWall1LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline1_subclamp(&args, &d); - } -}; - -class DrawWallRevSubClamp1LLVMCommand : public DrawWall1LLVMCommand -{ -public: - void Execute(DrawerThread *thread) override - { - WorkerThreadData d = ThreadData(thread); - LLVMDrawers::Instance()->tmvline1_revsubclamp(&args, &d); - } -}; +DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand); +DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand); ///////////////////////////////////////////////////////////////////////////// -class DrawerColumnCommand : public DrawerCommand -{ -public: - int _count; - BYTE * RESTRICT _dest; - int _pitch; - DWORD _iscale; - DWORD _texturefrac; - - DrawerColumnCommand() - { - _count = dc_count; - _dest = dc_dest; - _iscale = dc_iscale; - _texturefrac = dc_texturefrac; - _pitch = dc_pitch; - } - - class LoopIterator - { - public: - int count; - uint32_t *dest; - int pitch; - fixed_t fracstep; - fixed_t frac; - - LoopIterator(DrawerColumnCommand *command, DrawerThread *thread) - { - count = thread->count_for_thread(command->_dest_y, command->_count); - if (count <= 0) - return; - - dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - pitch = command->_pitch * thread->num_cores; - - fracstep = command->_iscale * thread->num_cores; - frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); - } - - uint32_t sample_index() - { - return frac >> FRACBITS; - } - - explicit operator bool() - { - return count > 0; - } - - bool next() - { - dest += pitch; - frac += fracstep; - return (--count) != 0; - } - }; -}; - -class DrawColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _light; - const BYTE * RESTRICT _source; - ShadeConstants _shade_constants; - BYTE * RESTRICT _colormap; - -public: - DrawColumnRGBACommand() - { - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _source = dc_source; - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::copy(fg); - } while (loop.next()); - } -}; - -class FillColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _color; - -public: - FillColumnRGBACommand() - { - uint32_t light = LightBgra::calc_light_multiplier(dc_light); - _color = LightBgra::shade_pal_index_simple(dc_color, light); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - *loop.dest = BlendBgra::copy(_color); - } while (loop.next()); - } -}; - -class FillAddColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _srccolor; - -public: - FillAddColumnRGBACommand() - { - _srccolor = dc_srccolor_bgra; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - - uint32_t alpha = APART(_srccolor); - alpha += alpha >> 7; - - do - { - *loop.dest = BlendBgra::add(_srccolor, *loop.dest, alpha, 256 - alpha); - } while (loop.next()); - } -}; - -class FillAddClampColumnRGBACommand : public DrawerColumnCommand -{ - int _color; - uint32_t _srccolor; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - FillAddClampColumnRGBACommand() - { - _color = dc_color; - _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - *loop.dest = BlendBgra::add(_srccolor, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class FillSubClampColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _srccolor; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - FillSubClampColumnRGBACommand() - { - _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - *loop.dest = BlendBgra::sub(_srccolor, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class FillRevSubClampColumnRGBACommand : public DrawerColumnCommand -{ - uint32_t _srccolor; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - FillRevSubClampColumnRGBACommand() - { - _srccolor = dc_srccolor_bgra; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - *loop.dest = BlendBgra::revsub(_srccolor, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawAddColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - BYTE * RESTRICT _colormap; - -public: - DrawAddColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - _colormap = dc_colormap; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawTranslatedColumnRGBACommand : public DrawerColumnCommand -{ - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - -public: - DrawTranslatedColumnRGBACommand() - { - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _translation = dc_translation; - _source = dc_source; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::copy(fg); - } while (loop.next()); - } -}; - -class DrawTlatedAddColumnRGBACommand : public DrawerColumnCommand -{ - fixed_t _light; - ShadeConstants _shade_constants; - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawTlatedAddColumnRGBACommand() - { - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _translation = dc_translation; - _source = dc_source; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawShadedColumnRGBACommand : public DrawerColumnCommand -{ -private: - const BYTE * RESTRICT _source; - lighttable_t * RESTRICT _colormap; - uint32_t _color; - -public: - DrawShadedColumnRGBACommand() - { - _source = dc_source; - _colormap = dc_colormap; - _color = LightBgra::shade_pal_index_simple(dc_color, LightBgra::calc_light_multiplier(dc_light)); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t alpha = clamp(_colormap[_source[loop.sample_index()]], 0, 64) * 4; - uint32_t inv_alpha = 256 - alpha; - *loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha); - } while (loop.next()); - } -}; - -class DrawAddClampColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawAddClampColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawAddClampTranslatedColumnRGBACommand : public DrawerColumnCommand -{ - BYTE * RESTRICT _translation; - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawAddClampTranslatedColumnRGBACommand() - { - _translation = dc_translation; - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawSubClampColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawSubClampColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawSubClampTranslatedColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawRevSubClampColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - -public: - DrawRevSubClampColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants); - *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - -class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand -{ - const BYTE * RESTRICT _source; - uint32_t _light; - ShadeConstants _shade_constants; - uint32_t _srcalpha; - uint32_t _destalpha; - BYTE * RESTRICT _translation; - -public: - DrawRevSubClampTranslatedColumnRGBACommand() - { - _source = dc_source; - _light = LightBgra::calc_light_multiplier(dc_light); - _shade_constants = dc_shade_constants; - _srcalpha = dc_srcalpha >> (FRACBITS - 8); - _destalpha = dc_destalpha >> (FRACBITS - 8); - _translation = dc_translation; - } - - void Execute(DrawerThread *thread) override - { - LoopIterator loop(this, thread); - if (!loop) return; - do - { - uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants); - *loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha); - } while (loop.next()); - } -}; - class DrawFuzzColumnRGBACommand : public DrawerCommand { int _x; @@ -1830,32 +1306,32 @@ void R_EndDrawerCommands() void R_DrawColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillAddColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillAddClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillSubClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_FillRevSubClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawFuzzColumn_rgba() @@ -1870,52 +1346,52 @@ void R_DrawFuzzColumn_rgba() void R_DrawAddColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawTranslatedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawTlatedAddColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawShadedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawAddClampTranslatedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawSubClampTranslatedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawRevSubClampTranslatedColumn_rgba() { - DrawerCommandQueue::QueueCommand(); + DrawerCommandQueue::QueueCommand(); } void R_DrawSpan_rgba()