From afab45674ba32901d789a631d858757862650d3d Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Thu, 29 Sep 2016 07:38:33 +0200 Subject: [PATCH] Added half of wall codegen --- .../fixedfunction/drawwallcodegen.cpp | 154 +++++++++++++++++- .../fixedfunction/drawwallcodegen.h | 49 ++++-- src/r_compiler/llvmdrawers.cpp | 70 +++++++- src/r_compiler/llvmdrawers.h | 46 ++++++ 4 files changed, 304 insertions(+), 15 deletions(-) diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.cpp b/src/r_compiler/fixedfunction/drawwallcodegen.cpp index 0e94c11ed..65b2224b5 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.cpp +++ b/src/r_compiler/fixedfunction/drawwallcodegen.cpp @@ -10,6 +10,158 @@ #include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_value.h" -void DrawWallCodegen::Generate(DrawWallVariant variant, SSAValue args) +void DrawWallCodegen::Generate(DrawWallVariant variant, bool fourColumns, SSAValue args) { + dest = args[0][0].load(); + source[0] = args[0][1].load(); + source[1] = args[0][2].load(); + source[2] = args[0][3].load(); + source[3] = args[0][4].load(); + source2[0] = args[0][5].load(); + source2[1] = args[0][6].load(); + source2[2] = args[0][7].load(); + source2[3] = args[0][8].load(); + pitch = args[0][9].load(); + count = args[0][10].load(); + dest_y = args[0][11].load(); + texturefrac[0] = args[0][12].load(); + texturefrac[1] = args[0][13].load(); + texturefrac[2] = args[0][14].load(); + texturefrac[3] = args[0][15].load(); + texturefracx[0] = args[0][16].load(); + texturefracx[1] = args[0][17].load(); + texturefracx[2] = args[0][18].load(); + texturefracx[3] = args[0][19].load(); + iscale[0] = args[0][20].load(); + iscale[1] = args[0][21].load(); + iscale[2] = args[0][22].load(); + iscale[3] = args[0][23].load(); + textureheight[0] = args[0][24].load(); + textureheight[1] = args[0][25].load(); + textureheight[2] = args[0][26].load(); + textureheight[3] = args[0][27].load(); + light[0] = args[0][28].load(); + light[1] = args[0][29].load(); + light[2] = args[0][30].load(); + light[3] = args[0][31].load(); + srcalpha = args[0][32].load(); + destalpha = args[0][33].load(); + SSAShort light_alpha = args[0][34].load(); + SSAShort light_red = args[0][35].load(); + SSAShort light_green = args[0][36].load(); + SSAShort light_blue = args[0][37].load(); + SSAShort fade_alpha = args[0][38].load(); + SSAShort fade_red = args[0][39].load(); + SSAShort fade_green = args[0][40].load(); + SSAShort fade_blue = args[0][41].load(); + SSAShort desaturate = args[0][42].load(); + SSAInt flags = args[0][43].load(); + shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int()); + shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int()); + shade_constants.desaturate = desaturate.zext_int(); + + is_simple_shade = (flags & DrawWallArgs::simple_shade) == DrawWallArgs::simple_shade; + is_nearest_filter = (flags & DrawWallArgs::nearest_filter) == DrawWallArgs::nearest_filter; + + /* + count = thread->count_for_thread(command->_dest_y, command->_count); + fracstep = command->_iscale * thread->num_cores; + frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y); + texturefracx = command->_texturefracx; + dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest); + pitch = command->_pitch * thread->num_cores; + height = command->_textureheight; + one = ((0x80000000 + height - 1) / height) * 2 + 1; + */ + int numColumns = fourColumns ? 4 : 1; + for (int i = 0; i < numColumns; i++) + { + stack_frac[i].store(texturefrac[i] + iscale[i]);// * skipped_by_thread(dest_y); + fracstep[i] = iscale[i];// * num_cores; + one[i] = ((0x80000000 + textureheight[i] - 1) / textureheight[i]) * 2 + 1; + } + + SSAIfBlock branch; + branch.if_block(is_simple_shade); + LoopShade(variant, fourColumns, true); + branch.else_block(); + LoopShade(variant, fourColumns, false); + branch.end_block(); +} + +void DrawWallCodegen::LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade) +{ + SSAIfBlock branch; + branch.if_block(is_nearest_filter); + Loop(variant, fourColumns, isSimpleShade, true); + branch.else_block(); + Loop(variant, fourColumns, isSimpleShade, false); + branch.end_block(); +} + +void DrawWallCodegen::Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter) +{ + int numColumns = fourColumns ? 4 : 1; + + stack_index.store(0); + { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < count); + + SSAInt frac[4]; + for (int i = 0; i < numColumns; i++) + frac[i] = stack_frac[i].load(); + + SSAInt offset = (dest_y + index) * pitch * 4; + + if (fourColumns) + { + + } + else + { + SSAVec4i bgcolor = dest[offset].load_vec4ub(); + SSAVec4i color = Blend(Shade(Sample(frac[0], isNearestFilter), 0, isSimpleShade), bgcolor, variant); + dest[offset].store_vec4ub(color); + } + + stack_index.store(index + 1); + for (int i = 0; i < numColumns; i++) + stack_frac[i].store(frac[i] + fracstep[i]); + loop.end_block(); + } +} + +SSAVec4i DrawWallCodegen::Sample(SSAInt frac, bool isNearestFilter) +{ + // int sample_index() { return ((frac >> FRACBITS) * height) >> FRACBITS; } + return SSAVec4i(0); +} + +SSAVec4i DrawWallCodegen::Shade(SSAVec4i fg, int index, bool isSimpleShade) +{ + if (isSimpleShade) + return shade_bgra_simple(fg, light[index]); + else + return shade_bgra_advanced(fg, light[index], shade_constants); +} + +SSAVec4i DrawWallCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant) +{ + switch (variant) + { + default: + case DrawWallVariant::Opaque: + return blend_copy(fg); + case DrawWallVariant::Masked: + return blend_alpha_blend(fg, bg); + case DrawWallVariant::Add: + case DrawWallVariant::AddClamp: + return blend_add(fg, bg, srcalpha, destalpha); + case DrawWallVariant::SubClamp: + return blend_sub(fg, bg, srcalpha, destalpha); + case DrawWallVariant::RevSubClamp: + return blend_revsub(fg, bg, srcalpha, destalpha); + } } diff --git a/src/r_compiler/fixedfunction/drawwallcodegen.h b/src/r_compiler/fixedfunction/drawwallcodegen.h index f514ca8ca..eafc8cf69 100644 --- a/src/r_compiler/fixedfunction/drawwallcodegen.h +++ b/src/r_compiler/fixedfunction/drawwallcodegen.h @@ -5,22 +5,45 @@ enum class DrawWallVariant { - Opaque1, // vlinec1 - Opaque4, // vlinec4 - Masked1, // mvlinec1 - Masked4, // mvlinec4 - Add1, // tmvline1_add - Add4, // tmvline4_add - AddClamp1, // tmvline1_addclamp - AddClamp4, // tmvline4_addclamp - SubClamp1, // tmvline1_subclamp - SubClamp4, // tmvline4_subclamp - RevSubClamp1, // tmvline1_revsubclamp - RevSubClamp4, // tmvline4_revsubclamp + Opaque, + Masked, + Add, + AddClamp, + SubClamp, + RevSubClamp }; class DrawWallCodegen : public DrawerCodegen { public: - void Generate(DrawWallVariant variant, SSAValue args); + void Generate(DrawWallVariant variant, bool fourColumns, SSAValue args); + +private: + void LoopShade(DrawWallVariant variant, bool fourColumns, bool isSimpleShade); + void Loop(DrawWallVariant variant, bool fourColumns, bool isSimpleShade, bool isNearestFilter); + SSAVec4i Sample(SSAInt frac, bool isNearestFilter); + SSAVec4i Shade(SSAVec4i fg, int index, bool isSimpleShade); + SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawWallVariant variant); + + SSAStack stack_index, stack_frac[4]; + + SSAUBytePtr dest; + SSAUBytePtr source[4]; + SSAUBytePtr source2[4]; + SSAInt pitch; + SSAInt count; + SSAInt dest_y; + SSAInt texturefrac[4]; + SSAInt texturefracx[4]; + SSAInt iscale[4]; + SSAInt textureheight[4]; + SSAInt light[4]; + SSAInt srcalpha; + SSAInt destalpha; + SSABool is_simple_shade; + SSABool is_nearest_filter; + SSAShadeConstants shade_constants; + + SSAInt fracstep[4]; + SSAInt one[4]; }; diff --git a/src/r_compiler/llvmdrawers.cpp b/src/r_compiler/llvmdrawers.cpp index 320bfb653..57c3293bb 100644 --- a/src/r_compiler/llvmdrawers.cpp +++ b/src/r_compiler/llvmdrawers.cpp @@ -47,7 +47,10 @@ public: private: void CodegenDrawSpan(const char *name, DrawSpanVariant variant); + void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); + static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); + static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); LLVMProgram mProgram; }; @@ -83,6 +86,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent); CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp); CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp); + CodegenDrawWall("vlinec1", DrawWallVariant::Opaque, 1); + CodegenDrawWall("vlinec4", DrawWallVariant::Opaque, 4); + CodegenDrawWall("mvlinec1", DrawWallVariant::Masked, 1); + CodegenDrawWall("mvlinec4", DrawWallVariant::Masked, 4); + CodegenDrawWall("tmvline1_add", DrawWallVariant::Add, 1); + CodegenDrawWall("tmvline4_add", DrawWallVariant::Add, 4); + CodegenDrawWall("tmvline1_addclamp", DrawWallVariant::AddClamp, 1); + CodegenDrawWall("tmvline4_addclamp", DrawWallVariant::AddClamp, 4); + CodegenDrawWall("tmvline1_subclamp", DrawWallVariant::SubClamp, 1); + CodegenDrawWall("tmvline4_subclamp", DrawWallVariant::SubClamp, 4); + CodegenDrawWall("tmvline1_revsubclamp", DrawWallVariant::RevSubClamp, 1); + CodegenDrawWall("tmvline4_revsubclamp", DrawWallVariant::RevSubClamp, 4); mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); @@ -93,6 +108,18 @@ LLVMDrawersImpl::LLVMDrawersImpl() DrawSpanMaskedTranslucent = mProgram.GetProcAddress("DrawSpanMaskedTranslucent"); DrawSpanAddClamp = mProgram.GetProcAddress("DrawSpanAddClamp"); DrawSpanMaskedAddClamp = mProgram.GetProcAddress("DrawSpanMaskedAddClamp"); + vlinec1 = mProgram.GetProcAddress("vlinec1"); + vlinec4 = mProgram.GetProcAddress("vlinec4"); + mvlinec1 = mProgram.GetProcAddress("mvlinec1"); + mvlinec4 = mProgram.GetProcAddress("mvlinec4"); + tmvline1_add = mProgram.GetProcAddress("tmvline1_add"); + tmvline4_add = mProgram.GetProcAddress("tmvline4_add"); + tmvline1_addclamp = mProgram.GetProcAddress("tmvline1_addclamp"); + tmvline4_addclamp = mProgram.GetProcAddress("tmvline4_addclamp"); + tmvline1_subclamp = mProgram.GetProcAddress("tmvline1_subclamp"); + tmvline4_subclamp = mProgram.GetProcAddress("tmvline4_subclamp"); + tmvline1_revsubclamp = mProgram.GetProcAddress("tmvline1_revsubclamp"); + tmvline4_revsubclamp = mProgram.GetProcAddress("tmvline4_revsubclamp"); mProgram.StopLogFatalErrors(); } @@ -117,11 +144,31 @@ void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) mProgram.functionPassManager()->run(*function.func); } +void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant, int columns) +{ + llvm::IRBuilder<> builder(mProgram.context()); + SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); + + SSAFunction function(name); + function.add_parameter(GetDrawWallArgsStruct(mProgram.context())); + function.create_public(); + + DrawWallCodegen codegen; + codegen.Generate(variant, columns == 4, function.parameter(0)); + + builder.CreateRetVoid(); + + if (llvm::verifyFunction(*function.func)) + I_FatalError("verifyFunction failed for " __FUNCTION__); + + mProgram.functionPassManager()->run(*function.func); +} + llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) { std::vector elements; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; - elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; @@ -148,6 +195,27 @@ llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) return llvm::StructType::get(context, elements, false)->getPointerTo(); } +llvm::Type *LLVMDrawersImpl::GetDrawWallArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 8; i++) + elements.push_back(llvm::Type::getInt8PtrTy(context)); + for (int i = 0; i < 25; i++) + elements.push_back(llvm::Type::getInt32Ty(context)); + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue; + elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + ///////////////////////////////////////////////////////////////////////////// namespace { static bool LogFatalErrors = false; } diff --git a/src/r_compiler/llvmdrawers.h b/src/r_compiler/llvmdrawers.h index 53e64032f..92f7e9440 100644 --- a/src/r_compiler/llvmdrawers.h +++ b/src/r_compiler/llvmdrawers.h @@ -1,6 +1,39 @@ #pragma once +struct DrawWallArgs +{ + uint32_t *dest; + const uint32_t *source[4]; + const uint32_t *source2[4]; + int32_t pitch; + int32_t count; + int32_t dest_y; + uint32_t texturefrac[4]; + uint32_t texturefracx[4]; + uint32_t iscale[4]; + uint32_t textureheight[4]; + uint32_t light[4]; + uint32_t srcalpha; + uint32_t destalpha; + + uint16_t light_alpha; + uint16_t light_red; + uint16_t light_green; + uint16_t light_blue; + uint16_t fade_alpha; + uint16_t fade_red; + uint16_t fade_green; + uint16_t fade_blue; + uint16_t desaturate; + uint32_t flags; + enum Flags + { + simple_shade = 1, + nearest_filter = 2 + }; +}; + struct DrawSpanArgs { uint32_t *destorg; @@ -52,6 +85,19 @@ public: void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr; + void(*vlinec1)(const DrawWallArgs *) = nullptr; + void(*vlinec4)(const DrawWallArgs *) = nullptr; + void(*mvlinec1)(const DrawWallArgs *) = nullptr; + void(*mvlinec4)(const DrawWallArgs *) = nullptr; + void(*tmvline1_add)(const DrawWallArgs *) = nullptr; + void(*tmvline4_add)(const DrawWallArgs *) = nullptr; + void(*tmvline1_addclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_addclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline1_subclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_subclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline1_revsubclamp)(const DrawWallArgs *) = nullptr; + void(*tmvline4_revsubclamp)(const DrawWallArgs *) = nullptr; + private: static LLVMDrawers *Singleton; };