From 28bb5da181535e5639d655efc44a0b177be5fa72 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Fri, 30 Sep 2016 07:27:25 +0200 Subject: [PATCH] Hooked up LLVM wall drawers --- .../fixedfunction/drawercodegen.cpp | 29 +- src/r_compiler/fixedfunction/drawercodegen.h | 21 ++ .../fixedfunction/drawwallcodegen.cpp | 59 ++-- .../fixedfunction/drawwallcodegen.h | 5 +- src/r_compiler/llvmdrawers.cpp | 36 ++- src/r_compiler/llvmdrawers.h | 32 ++- src/r_compiler/ssa/ssa_int.cpp | 11 + src/r_compiler/ssa/ssa_int.h | 3 + src/r_draw_rgba.cpp | 261 ++++++++++++++++++ 9 files changed, 410 insertions(+), 47 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawercodegen.cpp b/src/r_compiler/fixedfunction/drawercodegen.cpp index 5da858e27f..2cba501218 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.cpp +++ b/src/r_compiler/fixedfunction/drawercodegen.cpp @@ -10,6 +10,31 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" +SSABool DrawerCodegen::line_skipped_by_thread(SSAInt line, SSAWorkerThread thread) +{ + return line < thread.pass_start_y || line >= thread.pass_end_y || !(line % thread.num_cores == thread.core); +} + +SSAInt DrawerCodegen::skipped_by_thread(SSAInt first_line, SSAWorkerThread thread) +{ + SSAInt pass_skip = SSAInt::MAX(thread.pass_start_y - first_line, 0); + SSAInt core_skip = (thread.num_cores - (first_line + pass_skip - thread.core) % thread.num_cores) % thread.num_cores; + return pass_skip + core_skip; +} + +SSAInt DrawerCodegen::count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread) +{ + SSAInt lines_until_pass_end = SSAInt::MAX(thread.pass_end_y - first_line, 0); + count = SSAInt::MIN(count, lines_until_pass_end); + SSAInt c = (count - skipped_by_thread(first_line, thread) + thread.num_cores - 1) / thread.num_cores; + return SSAInt::MAX(c, 0); +} + +SSAUBytePtr DrawerCodegen::dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread) +{ + return dest[skipped_by_thread(first_line, thread) * pitch * 4]; +} + SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) { return 256 - (light >> (FRACBITS - 8)); @@ -105,8 +130,8 @@ SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt SSAVec4i p11 = col1[y1 * 4].load_vec4ub(); SSAInt inv_b = texturefracx; - SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; - SSAInt a = 16 - inv_a; + SSAInt a = (frac_y1 >> (FRACBITS - 4)) & 15; + SSAInt inv_a = 16 - a; SSAInt b = 16 - inv_b; return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8; diff --git a/src/r_compiler/fixedfunction/drawercodegen.h b/src/r_compiler/fixedfunction/drawercodegen.h index 9e0706ed1a..17b36234dc 100644 --- a/src/r_compiler/fixedfunction/drawercodegen.h +++ b/src/r_compiler/fixedfunction/drawercodegen.h @@ -18,6 +18,15 @@ #include "r_compiler/ssa/ssa_barycentric_weight.h" #include "r_compiler/llvm_include.h" +class SSAWorkerThread +{ +public: + SSAInt core; + SSAInt num_cores; + SSAInt pass_start_y; + SSAInt pass_end_y; +}; + class SSAShadeConstants { public: @@ -29,6 +38,18 @@ public: class DrawerCodegen { public: + // Checks if a line is rendered by this thread + SSABool line_skipped_by_thread(SSAInt line, SSAWorkerThread thread); + + // The number of lines to skip to reach the first line to be rendered by this thread + SSAInt skipped_by_thread(SSAInt first_line, SSAWorkerThread thread); + + // The number of lines to be rendered by this thread + SSAInt count_for_thread(SSAInt first_line, SSAInt count, SSAWorkerThread thread); + + // Calculate the dest address for the first line to be rendered by this thread + SSAUBytePtr dest_for_thread(SSAInt first_line, SSAInt pitch, SSAUBytePtr dest, SSAWorkerThread thread); + // LightBgra SSAInt calc_light_multiplier(SSAInt light); SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 65b2224b57..0ca5377234 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -10,7 +10,7 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args) +void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data) { dest = args[0][0].load(); source[0] = args[0][1].load(); @@ -60,24 +60,24 @@ void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAVal shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); shade_constants.desaturate = desaturate.zext_int(); + thread.core = thread_data[0][0].load(); + thread.num_cores = thread_data[0][1].load(); + thread.pass_start_y = thread_data[0][2].load(); + thread.pass_end_y = thread_data[0][3].load(); + is_simple_shade = (flags & DrawWallArgs::simple_shade) == DrawWallArgs::simple_shade; is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == DrawWallArgs::nearest_filter; - /* - count = thread->count_for_thread(command->_dest_y, command->_count); - fracstep = command->_iscale * thread->num_cores; - frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); - texturefracx = command->_texturefracx; - dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); - pitch = command->_pitch * thread->num_cores; - height = command->_textureheight; - one = ((0x80000000 + height - 1) / height) * 2 + 1; - */ + count = count_for_thread(dest_y, count, thread); + dest = dest_for_thread(dest_y, pitch, dest, thread); + + pitch = pitch * thread.num_cores; + int numColumns = fourColumns ? 4 : 1; for (int i = 0; i < numColumns; i++) { - stack_frac[i].store(texturefrac[i] + iscale[i]);// * skipped_by_thread(dest_y); - fracstep[i] = iscale[i];// * num_cores; + stack_frac[i].store(texturefrac[i] + iscale[i] * skipped_by_thread(dest_y, thread)); + fracstep[i] = iscale[i] * thread.num_cores; one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; } @@ -113,16 +113,32 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim for (int i = 0; i < numColumns; i++) frac[i] = stack_frac[i].load(); - SSAInt offset = (dest_y + index) * pitch * 4; + SSAInt offset = index * pitch * 4; if (fourColumns) { + SSAVec16ub bg = dest[offset].load_unaligned_vec16ub(); + SSAVec8s bg0 = SSAVec8s::extendlo(bg); + SSAVec8s bg1 = SSAVec8s::extendhi(bg); + SSAVec4i bgcolors[4] = + { + SSAVec4i::extendlo(bg0), + SSAVec4i::extendhi(bg0), + SSAVec4i::extendlo(bg1), + SSAVec4i::extendhi(bg1) + }; + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + colors[i] = Blend(Shade(Sample(frac[i], i, isNearestFilter), i, isSimpleShade), bgcolors[i], variant); + + SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + dest[offset].store_unaligned_vec16ub(color); } else { SSAVec4i bgcolor = dest[offset].load_vec4ub(); - SSAVec4i color = Blend(Shade(Sample(frac[0], isNearestFilter), 0, isSimpleShade), bgcolor, variant); + SSAVec4i color = Blend(Shade(Sample(frac[0], 0, isNearestFilter), 0, isSimpleShade), bgcolor, variant); dest[offset].store_vec4ub(color); } @@ -133,10 +149,17 @@ void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSim } } -SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, int index, bool isNearestFilter) { - // int sample_index() { return ((frac >> FRACBITS) * height) >> FRACBITS; } - return SSAVec4i(0); + if (isNearestFilter) + { + SSAInt sample_index = ((frac >> FRACBITS) * textureheight[index]) >> FRACBITS; + return source[index][sample_index * 4].load_vec4ub(); + } + else + { + return sample_linear(source[index], source2[index], texturefracx[index], frac, one[index], textureheight[index]); + } } SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index eafc8cf697..0e1cce5fcf 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -16,12 +16,12 @@ enum class DrawWallVariant class DrawWallCodegen : public DrawerCodegen { public: - void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args); + void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args, SSAValue thread_data); private: void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); - SSAVec4i Sample(SSAInt frac, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, int index, bool isNearestFilter); SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); @@ -43,6 +43,7 @@ private: SSABool is_simple_shade; SSABool is_nearest_filter; SSAShadeConstants shade_constants; + SSAWorkerThread thread; SSAInt fracstep[4]; SSAInt one[4]; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 57c3293bbd..60727744c8 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -51,6 +51,7 @@ private: static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -108,18 +109,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); - vlinec1 = mProgram.GetProcAddress("vlinec1"); - vlinec4 = mProgram.GetProcAddress("vlinec4"); - mvlinec1 = mProgram.GetProcAddress("mvlinec1"); - mvlinec4 = mProgram.GetProcAddress("mvlinec4"); - tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); - tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); - tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); - tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); - tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); - tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); - tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); - tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); + vlinec1 = mProgram.GetProcAddress("vlinec1"); + vlinec4 = mProgram.GetProcAddress("vlinec4"); + mvlinec1 = mProgram.GetProcAddress("mvlinec1"); + mvlinec4 = mProgram.GetProcAddress("mvlinec4"); + tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); + tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); + tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); + tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); + tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); + tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); + tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); + tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); mProgram.StopLogFatalErrors(); } @@ -151,10 +152,11 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, SSAFunction function(name); function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.add_parameter(GetWorkerThreadDataStruct(mProgram.context())); function.create_public(); DrawWallCodegen codegen; - codegen.Generate(variant, columns == 4, function.parameter(0)); + codegen.Generate(variant, columns == 4, function.parameter(0), function.parameter(1)); builder.CreateRetVoid(); @@ -216,6 +218,14 @@ llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) return llvm::StructType::get(context, elements, false)->getPointerTo(); } +llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &context) +{ + std::vector elements; + for (int i = 0; i < 4; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + ///////////////////////////////////////////////////////////////////////////// namespace { static bool LogFatalErrors = false; } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 92f7e9440c..b1039cf496 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,6 +1,14 @@ #pragma once +struct WorkerThreadData +{ + int32_t core; + int32_t num_cores; + int32_t pass_start_y; + int32_t pass_end_y; +}; + struct DrawWallArgs { uint32_t *dest; @@ -85,18 +93,18 @@ public: void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; - void(*vlinec1)(const DrawWallArgs *) = nullptr; - void(*vlinec4)(const DrawWallArgs *) = nullptr; - void(*mvlinec1)(const DrawWallArgs *) = nullptr; - void(*mvlinec4)(const DrawWallArgs *) = nullptr; - void(*tmvline1_add)(const DrawWallArgs *) = nullptr; - void(*tmvline4_add)(const DrawWallArgs *) = nullptr; - void(*tmvline1_addclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_addclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline1_subclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_subclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline1_revsubclamp)(const DrawWallArgs *) = nullptr; - void(*tmvline4_revsubclamp)(const DrawWallArgs *) = nullptr; + void(*vlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*vlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*mvlinec1)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*mvlinec4)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_add)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_addclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_subclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline1_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; + void(*tmvline4_revsubclamp)(const DrawWallArgs *, const WorkerThreadData *) = nullptr; private: static LLVMDrawers *Singleton; diff --git a/src/r_compiler/ssa/ssa_int.cpp b/src/r_compiler/ssa/ssa_int.cpp index 674f44350f..8d5a32e4c6 100644 --- a/src/r_compiler/ssa/ssa_int.cpp +++ b/src/r_compiler/ssa/ssa_int.cpp @@ -1,6 +1,7 @@ #include "ssa_int.h" #include "ssa_float.h" +#include "ssa_bool.h" #include "ssa_scope.h" #include "r_compiler/llvm_include.h" @@ -31,6 +32,16 @@ llvm::Type *SSAInt::llvm_type() return llvm::Type::getInt32Ty(SSAScope::context()); } +SSAInt SSAInt::MIN(SSAInt a, SSAInt b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a < b).v, a.v, b.v, SSAScope::hint())); +} + +SSAInt SSAInt::MAX(SSAInt a, SSAInt b) +{ + return SSAInt::from_llvm(SSAScope::builder().CreateSelect((a > b).v, a.v, b.v, SSAScope::hint())); +} + SSAInt operator+(const SSAInt &a, const SSAInt &b) { return SSAInt::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); diff --git a/src/r_compiler/ssa/ssa_int.h b/src/r_compiler/ssa/ssa_int.h index 5e373c62e3..d928c41f2c 100644 --- a/src/r_compiler/ssa/ssa_int.h +++ b/src/r_compiler/ssa/ssa_int.h @@ -16,6 +16,9 @@ public: static SSAInt from_llvm(llvm::Value *v) { return SSAInt(v); } static llvm::Type *llvm_type(); + static SSAInt MIN(SSAInt a, SSAInt b); + static SSAInt MAX(SSAInt a, SSAInt b); + llvm::Value *v; }; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index 8a0a6871a0..c76c2c3c59 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -404,6 +404,219 @@ public: ///////////////////////////////////////////////////////////////////////////// +class DrawWall4LLVMCommand : public DrawerCommand +{ +protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawWall4LLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.count = dc_count; + args.pitch = dc_pitch; + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + for (int i = 0; i < 4; i++) + { + args.texturefrac[i] = vplce[i]; + args.iscale[i] = vince[i]; + args.texturefracx[i] = buftexturefracx[i]; + args.textureheight[i] = bufheight[i]; + args.source[i] = (const uint32_t *)bufplce[i]; + args.source2[i] = (const uint32_t *)bufplce2[i]; + args.light[i] = LightBgra::calc_light_multiplier(palookuplight[i]); + } + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawWallArgs::simple_shade; + if (args.source2[0] == nullptr) + args.flags |= DrawWallArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->vlinec4(&args, &d); + } +}; + +class DrawWallMasked4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->mvlinec4(&args, &d); + } +}; + +class DrawWallAdd4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_add(&args, &d); + } +}; + +class DrawWallAddClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_addclamp(&args, &d); + } +}; + +class DrawWallSubClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_subclamp(&args, &d); + } +}; + +class DrawWallRevSubClamp4LLVMCommand : public DrawWall4LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline4_revsubclamp(&args, &d); + } +}; + +class DrawWall1LLVMCommand : public DrawerCommand +{ +protected: + DrawWallArgs args; + + WorkerThreadData ThreadData(DrawerThread *thread) + { + WorkerThreadData d; + d.core = thread->core; + d.num_cores = thread->num_cores; + d.pass_start_y = thread->pass_start_y; + d.pass_end_y = thread->pass_end_y; + return d; + } + +public: + DrawWall1LLVMCommand() + { + args.dest = (uint32_t*)dc_dest; + args.dest_y = _dest_y; + args.pitch = dc_pitch; + args.count = dc_count; + args.texturefrac[0] = dc_texturefrac; + args.texturefracx[0] = dc_texturefracx; + args.iscale[0] = dc_iscale; + args.textureheight[0] = dc_textureheight; + args.source[0] = (const uint32 *)dc_source; + args.source2[0] = (const uint32 *)dc_source2; + args.light[0] = LightBgra::calc_light_multiplier(dc_light); + args.light_red = dc_shade_constants.light_red; + args.light_green = dc_shade_constants.light_green; + args.light_blue = dc_shade_constants.light_blue; + args.light_alpha = dc_shade_constants.light_alpha; + args.fade_red = dc_shade_constants.fade_red; + args.fade_green = dc_shade_constants.fade_green; + args.fade_blue = dc_shade_constants.fade_blue; + args.fade_alpha = dc_shade_constants.fade_alpha; + args.desaturate = dc_shade_constants.desaturate; + args.srcalpha = dc_srcalpha >> (FRACBITS - 8); + args.destalpha = dc_destalpha >> (FRACBITS - 8); + args.flags = 0; + if (dc_shade_constants.simple_shade) + args.flags |= DrawWallArgs::simple_shade; + if (args.source2[0] == nullptr) + args.flags |= DrawWallArgs::nearest_filter; + } + + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->vlinec1(&args, &d); + } +}; + +class DrawWallMasked1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->mvlinec1(&args, &d); + } +}; + +class DrawWallAdd1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_add(&args, &d); + } +}; + +class DrawWallAddClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_addclamp(&args, &d); + } +}; + +class DrawWallSubClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_subclamp(&args, &d); + } +}; + +class DrawWallRevSubClamp1LLVMCommand : public DrawWall1LLVMCommand +{ +public: + void Execute(DrawerThread *thread) override + { + WorkerThreadData d = ThreadData(thread); + LLVMDrawers::Instance()->tmvline1_revsubclamp(&args, &d); + } +}; + +///////////////////////////////////////////////////////////////////////////// + class DrawerColumnCommand : public DrawerCommand { public: @@ -2901,7 +3114,11 @@ void R_DrawSlab_rgba(int dx, fixed_t v, int dy, fixed_t vi, const BYTE *vptr, BY DWORD vlinec1_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } @@ -2920,72 +3137,116 @@ void queue_wallcommand() void vlinec4_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } DWORD mvlinec1_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void mvlinec4_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_add_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_add_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_addclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_addclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_subclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_subclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; } fixed_t tmvline1_revsubclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else DrawerCommandQueue::QueueCommand(); +#endif return dc_texturefrac + dc_count * dc_iscale; } void tmvline4_revsubclamp_rgba() { +#if !defined(NO_LLVM) + DrawerCommandQueue::QueueCommand(); +#else queue_wallcommand(); +#endif for (int i = 0; i < 4; i++) vplce[i] += vince[i] * dc_count; }