diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp index 116744f1c..601358274 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.cpp +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.cpp @@ -11,7 +11,7 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data) +void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(); source = args[0][1].load(); @@ -21,7 +21,8 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAVa pitch = args[0][5].load(); count = args[0][6].load(); dest_y = args[0][7].load(); - iscale = args[0][8].load(); + if (method == DrawColumnMethod::Normal) + iscale = args[0][8].load(); texturefrac = args[0][9].load(); light = args[0][10].load(); color = SSAVec4i::unpack(args[0][11].load()); @@ -46,109 +47,148 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAVa thread.num_cores = thread_data[0][1].load(); thread.pass_start_y = thread_data[0][2].load(); thread.pass_end_y = thread_data[0][3].load(); + thread.temp = thread_data[0][4].load(); is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); count = count_for_thread(dest_y, count, thread); dest = dest_for_thread(dest_y, pitch, dest, thread); pitch = pitch * thread.num_cores; - stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); - iscale = iscale * thread.num_cores; + if (method == DrawColumnMethod::Normal) + { + stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); + iscale = iscale * thread.num_cores; + } + else + { + source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4]; + } SSAIfBlock branch; branch.if_block(is_simple_shade); - Loop(variant, true); + Loop(variant, method, true); branch.else_block(); - Loop(variant, false); + Loop(variant, method, false); branch.end_block(); } -void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade) +void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade) { + SSAInt sincr; + if (method != DrawColumnMethod::Normal) + sincr = thread.num_cores * 4; + stack_index.store(SSAInt(0)); { SSAForBlock loop; SSAInt index = stack_index.load(); loop.loop_block(index < count); - SSAInt frac = stack_frac.load(); - - SSAInt offset = index * pitch * 4; - SSAVec4i bgcolor = dest[offset].load_vec4ub(); - - SSAInt alpha, inv_alpha; - SSAVec4i outcolor; - switch (variant) + SSAInt sample_index, frac; + if (method == DrawColumnMethod::Normal) { - default: - case DrawColumnVariant::Draw: - outcolor = blend_copy(Shade(ColormapSample(frac), isSimpleShade)); - break; - case DrawColumnVariant::DrawAdd: - case DrawColumnVariant::DrawAddClamp: - outcolor = blend_add(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawShaded: - alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(frac), SSAInt(64)), SSAInt(0)) * 4; - inv_alpha = 256 - alpha; - outcolor = blend_add(color, bgcolor, alpha, inv_alpha); - break; - case DrawColumnVariant::DrawSubClamp: - outcolor = blend_sub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawRevSubClamp: - outcolor = blend_revsub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawTranslated: - outcolor = blend_copy(Shade(TranslateSample(frac), isSimpleShade)); - break; - case DrawColumnVariant::DrawTlatedAdd: - case DrawColumnVariant::DrawAddClampTranslated: - outcolor = blend_add(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawSubClampTranslated: - outcolor = blend_sub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::DrawRevSubClampTranslated: - outcolor = blend_revsub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::Fill: - outcolor = blend_copy(color); - break; - case DrawColumnVariant::FillAdd: - alpha = srccolor[3]; - alpha = alpha + (alpha >> 7); - inv_alpha = 256 - alpha; - outcolor = blend_add(srccolor, bgcolor, alpha, inv_alpha); - break; - case DrawColumnVariant::FillAddClamp: - outcolor = blend_add(srccolor, bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::FillSubClamp: - outcolor = blend_sub(srccolor, bgcolor, srcalpha, destalpha); - break; - case DrawColumnVariant::FillRevSubClamp: - outcolor = blend_revsub(srccolor, bgcolor, srcalpha, destalpha); - break; + frac = stack_frac.load(); + sample_index = frac >> FRACBITS; + } + else + { + sample_index = index * sincr * 4; } - dest[offset].store_vec4ub(outcolor); + SSAInt offset = index * pitch * 4; + SSAVec4i bgcolor[4]; + + int numColumns = (method == DrawColumnMethod::Rt4) ? 4 : 1; + + if (numColumns == 4) + { + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + bgcolor[0] = SSAVec4i::extendlo(bg0); + bgcolor[1] = SSAVec4i::extendhi(bg0); + bgcolor[2] = SSAVec4i::extendlo(bg1); + bgcolor[3] = SSAVec4i::extendhi(bg1); + } + else + { + bgcolor[0] = dest[offset].load_vec4ub(); + } + + SSAVec4i outcolor[4]; + for (int i = 0; i < numColumns; i++) + outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, isSimpleShade); + + if (numColumns == 4) + { + SSAVec16ub packedcolor(SSAVec8s(outcolor[0], outcolor[1]), SSAVec8s(outcolor[2], outcolor[3])); + dest[offset].store_unaligned_vec16ub(packedcolor); + } + else + { + dest[offset].store_vec4ub(outcolor[0]); + } stack_index.store(index + 1); - stack_frac.store(frac + iscale); + if (method == DrawColumnMethod::Normal) + stack_frac.store(frac + iscale); loop.end_block(); } } -SSAInt DrawColumnCodegen::ColormapSample(SSAInt frac) +SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade) +{ + SSAInt alpha, inv_alpha; + switch (variant) + { + default: + case DrawColumnVariant::DrawCopy: + return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub()); + case DrawColumnVariant::Draw: + return blend_copy(Shade(ColormapSample(sample_index), isSimpleShade)); + case DrawColumnVariant::DrawAdd: + case DrawColumnVariant::DrawAddClamp: + return blend_add(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawShaded: + alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4; + inv_alpha = 256 - alpha; + return blend_add(color, bgcolor, alpha, inv_alpha); + case DrawColumnVariant::DrawSubClamp: + return blend_sub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawRevSubClamp: + return blend_revsub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawTranslated: + return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade)); + case DrawColumnVariant::DrawTlatedAdd: + case DrawColumnVariant::DrawAddClampTranslated: + return blend_add(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawSubClampTranslated: + return blend_sub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::DrawRevSubClampTranslated: + return blend_revsub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha); + case DrawColumnVariant::Fill: + return blend_copy(color); + case DrawColumnVariant::FillAdd: + alpha = srccolor[3]; + alpha = alpha + (alpha >> 7); + inv_alpha = 256 - alpha; + return blend_add(srccolor, bgcolor, alpha, inv_alpha); + case DrawColumnVariant::FillAddClamp: + return blend_add(srccolor, bgcolor, srcalpha, destalpha); + case DrawColumnVariant::FillSubClamp: + return blend_sub(srccolor, bgcolor, srcalpha, destalpha); + case DrawColumnVariant::FillRevSubClamp: + return blend_revsub(srccolor, bgcolor, srcalpha, destalpha); + } +} + +SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index) { - SSAInt sample_index = frac >> FRACBITS; return colormap[source[sample_index].load().zext_int()].load().zext_int(); } -SSAInt DrawColumnCodegen::TranslateSample(SSAInt frac) +SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index) { - SSAInt sample_index = frac >> FRACBITS; return translation[source[sample_index].load().zext_int()].load().zext_int(); } diff --git a/src/r_compiler/fixedfunction/drawcolumncodegen.h b/src/r_compiler/fixedfunction/drawcolumncodegen.h index 488c36295..675a5ea67 100644 --- a/src/r_compiler/fixedfunction/drawcolumncodegen.h +++ b/src/r_compiler/fixedfunction/drawcolumncodegen.h @@ -10,6 +10,7 @@ enum class DrawColumnVariant FillAddClamp, FillSubClamp, FillRevSubClamp, + DrawCopy, Draw, DrawAdd, DrawTranslated, @@ -23,13 +24,21 @@ enum class DrawColumnVariant DrawRevSubClampTranslated }; +enum class DrawColumnMethod +{ + Normal, + Rt1, + Rt4 +}; + class DrawColumnCodegen : public DrawerCodegen { public: - void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data); + void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data); private: - void Loop(DrawColumnVariant variant, bool isSimpleShade); + void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade); + SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade); SSAInt ColormapSample(SSAInt frac); SSAInt TranslateSample(SSAInt frac); SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade); diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 17b36234d..27dc6f21d 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -25,6 +25,7 @@ public: SSAInt num_cores; SSAInt pass_start_y; SSAInt pass_end_y; + SSAUBytePtr temp; }; class SSAShadeConstants diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 6ab4f5a4f..3108b8c6a 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -47,7 +47,7 @@ public: LLVMDrawersImpl(); private: - void CodegenDrawColumn(const char *name, DrawColumnVariant variant); + void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method); void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); @@ -84,22 +84,36 @@ LLVMDrawers *LLVMDrawers::Instance() LLVMDrawersImpl::LLVMDrawersImpl() { - CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill); - CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd); - CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp); - CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp); - CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp); - CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw); - CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd); - CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated); - CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd); - CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded); - CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp); - CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated); - CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp); - CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated); - CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp); - CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated); + CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal); + CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1); + CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4); + CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); @@ -129,15 +143,29 @@ LLVMDrawersImpl::LLVMDrawersImpl() FillColumnRevSubClamp = mProgram.GetProcAddress("FillColumnRevSubClamp"); DrawColumn = mProgram.GetProcAddress("DrawColumn"); DrawColumnAdd = mProgram.GetProcAddress("DrawColumnAdd"); - DrawColumnTranslated = mProgram.GetProcAddress("DrawColumnTranslated"); - DrawColumnTlatedAdd = mProgram.GetProcAddress("DrawColumnTlatedAdd"); DrawColumnShaded = mProgram.GetProcAddress("DrawColumnShaded"); DrawColumnAddClamp = mProgram.GetProcAddress("DrawColumnAddClamp"); - DrawColumnAddClampTranslated = mProgram.GetProcAddress("DrawColumnAddClampTranslated"); DrawColumnSubClamp = mProgram.GetProcAddress("DrawColumnSubClamp"); - DrawColumnSubClampTranslated = mProgram.GetProcAddress("DrawColumnSubClampTranslated"); DrawColumnRevSubClamp = mProgram.GetProcAddress("DrawColumnRevSubClamp"); + DrawColumnTranslated = mProgram.GetProcAddress("DrawColumnTranslated"); + DrawColumnTlatedAdd = mProgram.GetProcAddress("DrawColumnTlatedAdd"); + DrawColumnAddClampTranslated = mProgram.GetProcAddress("DrawColumnAddClampTranslated"); + DrawColumnSubClampTranslated = mProgram.GetProcAddress("DrawColumnSubClampTranslated"); DrawColumnRevSubClampTranslated = mProgram.GetProcAddress("DrawColumnRevSubClampTranslated"); + DrawColumnRt1 = mProgram.GetProcAddress("DrawColumnRt1"); + DrawColumnRt1Copy = mProgram.GetProcAddress("DrawColumnRt1Copy"); + DrawColumnRt1Add = mProgram.GetProcAddress("DrawColumnRt1Add"); + DrawColumnRt1Shaded = mProgram.GetProcAddress("DrawColumnRt1Shaded"); + DrawColumnRt1AddClamp = mProgram.GetProcAddress("DrawColumnRt1AddClamp"); + DrawColumnRt1SubClamp = mProgram.GetProcAddress("DrawColumnRt1SubClamp"); + DrawColumnRt1RevSubClamp = mProgram.GetProcAddress("DrawColumnRt1RevSubClamp"); + DrawColumnRt4 = mProgram.GetProcAddress("DrawColumnRt4"); + DrawColumnRt4Copy = mProgram.GetProcAddress("DrawColumnRt4Copy"); + DrawColumnRt4Add = mProgram.GetProcAddress("DrawColumnRt4Add"); + DrawColumnRt4Shaded = mProgram.GetProcAddress("DrawColumnRt4Shaded"); + DrawColumnRt4AddClamp = mProgram.GetProcAddress("DrawColumnRt4AddClamp"); + DrawColumnRt4SubClamp = mProgram.GetProcAddress("DrawColumnRt4SubClamp"); + DrawColumnRt4RevSubClamp = mProgram.GetProcAddress("DrawColumnRt4RevSubClamp"); DrawSpan = mProgram.GetProcAddress("DrawSpan"); DrawSpanMasked = mProgram.GetProcAddress("DrawSpanMasked"); DrawSpanTranslucent = mProgram.GetProcAddress("DrawSpanTranslucent"); @@ -160,7 +188,7 @@ LLVMDrawersImpl::LLVMDrawersImpl() mProgram.StopLogFatalErrors(); } -void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant) +void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method) { llvm::IRBuilder<> builder(mProgram.context()); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); @@ -171,7 +199,7 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari function.create_public(); DrawColumnCodegen codegen; - codegen.Generate(variant, function.parameter(0), function.parameter(1)); + codegen.Generate(variant, method, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); @@ -310,6 +338,7 @@ llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &contex std::vector elements; for (int i = 0; i < 4; i++) elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt8PtrTy(context)); return llvm::StructType::get(context, elements, false)->getPointerTo(); } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 2ce4c5230..549825e4f 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -7,6 +7,7 @@ struct WorkerThreadData int32_t num_cores; int32_t pass_start_y; int32_t pass_end_y; + uint32_t *temp; }; struct DrawWallArgs @@ -122,20 +123,34 @@ public: void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; - void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt1RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; + void(*DrawColumnRt4RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 43075d0a6..b875bd413 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -334,6 +334,7 @@ public: class name##LLVMCommand : public base \ { \ public: \ + using base::base; \ void Execute(DrawerThread *thread) override \ { \ WorkerThreadData d = ThreadData(thread); \ diff --git a/src/r_drawt_rgba.cpp b/src/r_drawt_rgba.cpp index 45bd5c029..a73ba643c 100644 --- a/src/r_drawt_rgba.cpp +++ b/src/r_drawt_rgba.cpp @@ -43,6 +43,7 @@ #include "r_things.h" #include "v_video.h" #include "r_draw_rgba.h" +#include "r_compiler/llvmdrawers.h" #ifndef NO_SSE #include #endif @@ -89,6 +90,89 @@ extern unsigned int *horizspan[4]; ///////////////////////////////////////////////////////////////////////////// +class DrawColumnRt1LLVMCommand : public DrawerCommand +{ +protected: + DrawColumnArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + d.temp = thread->dc_temp_rgba; + return d; + } + +public: + DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh) + { + args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx; + args.source = nullptr; + args.colormap = dc_colormap; + args.translation = dc_translation; + args.basecolors = (const uint32_t *)GPalette.BaseColors; + args.pitch = dc_pitch; + args.count = yh - yl + 1; + args.dest_y = yl; + args.iscale = dc_iscale; + args.texturefrac = hx; + args.light = LightBgra::calc_light_multiplier(dc_light); + args.color = LightBgra::shade_pal_index_simple(dc_color, args.light); + args.srccolor = dc_srccolor_bgra; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawColumnArgs::simple_shade; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->DrawColumnRt1(&args, &d); + } +}; + +#define DECLARE_DRAW_COMMAND(name, func, base) \ +class name##LLVMCommand : public base \ +{ \ +public: \ + using base::base; \ + void Execute(DrawerThread *thread) override \ + { \ + WorkerThreadData d = ThreadData(thread); \ + LLVMDrawers::Instance()->func(&args, &d); \ + } \ +}; + +DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand); +DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand); + +///////////////////////////////////////////////////////////////////////////// + class DrawerRt1colCommand : public DrawerCommand { public: @@ -756,7 +840,7 @@ public: // Copies one span at hx to the screen at sx. void rt_copy1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Copies all four spans to the screen starting at sx. @@ -772,17 +856,13 @@ void rt_copy4cols_rgba (int sx, int yl, int yh) // Maps one span at hx to the screen at sx. void rt_map1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Maps all four spans to the screen starting at sx. void rt_map4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) @@ -812,17 +892,13 @@ void rt_tlate4cols_rgba (int sx, int yl, int yh) // Adds one span at hx to the screen at sx without clamping. void rt_add1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx without clamping. void rt_add4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and adds one span at hx to the screen at sx without clamping. @@ -842,33 +918,25 @@ void rt_tlateadd4cols_rgba(int sx, int yl, int yh) // Shades one span at hx to the screen at sx. void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Shades all four spans to the screen starting at sx. void rt_shaded4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Adds one span at hx to the screen at sx with clamping. void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Adds all four spans to the screen starting at sx with clamping. void rt_addclamp4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and adds one span at hx to the screen at sx with clamping. @@ -888,17 +956,13 @@ void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh) // Subtracts one span at hx to the screen at sx with clamping. void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans to the screen starting at sx with clamping. void rt_subclamp4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and subtracts one span at hx to the screen at sx with clamping. @@ -918,17 +982,13 @@ void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh) // Subtracts one span at hx from the screen at sx with clamping. void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) { - DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); + DrawerCommandQueue::QueueCommand(hx, sx, yl, yh); } // Subtracts all four spans from the screen starting at sx with clamping. void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) { -#ifdef NO_SSE - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#else - DrawerCommandQueue::QueueCommand(sx, yl, yh); -#endif + DrawerCommandQueue::QueueCommand(0, sx, yl, yh); } // Translates and subtracts one span at hx from the screen at sx with clamping.