From f9a7186550bf6b6e72f9770ba79e3789b7acf541 Mon Sep 17 00:00:00 2001 From: Magnus Norddahl Date: Tue, 27 Sep 2016 22:54:37 +0200 Subject: [PATCH] Improve DrawSpan codegen enough to do the simple shade for 64x64 flats --- .../fixedfunction/fixedfunction.cpp | 112 +++++++++++++++--- src/r_compiler/fixedfunction/fixedfunction.h | 26 +++- src/r_draw_rgba.cpp | 20 +++- 3 files changed, 139 insertions(+), 19 deletions(-) diff --git a/src/r_compiler/fixedfunction/fixedfunction.cpp b/src/r_compiler/fixedfunction/fixedfunction.cpp index cc46b8d50..8f8b09f23 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.cpp +++ b/src/r_compiler/fixedfunction/fixedfunction.cpp @@ -38,8 +38,8 @@ RenderProgram::RenderProgram() cpuFeaturesStr += it.getKey(); } - Printf("LLVM target triple: %s\n", targetTriple.c_str()); - Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); + //Printf("LLVM target triple: %s\n", targetTriple.c_str()); + //Printf("LLVM CPU and features: %s, %s\n", cpuName.c_str(), cpuFeaturesStr.c_str()); const Target *target = TargetRegistry::lookupTarget(targetTriple, errorstring); if (!target) @@ -98,7 +98,7 @@ FixedFunction::FixedFunction() mProgram.engine()->finalizeObject(); mProgram.modulePassManager()->run(*mProgram.module()); - DrawSpan = mProgram.GetProcAddress("DrawSpan"); + DrawSpan = mProgram.GetProcAddress("DrawSpan"); } void FixedFunction::CodegenDrawSpan() @@ -107,29 +107,90 @@ void FixedFunction::CodegenDrawSpan() SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); SSAFunction function("DrawSpan"); - function.add_parameter(SSAInt::llvm_type()); - function.add_parameter(SSAUBytePtr::llvm_type()); + function.add_parameter(GetRenderArgsStruct(mProgram.context())); function.create_public(); - SSAInt count = function.parameter(0); - SSAUBytePtr data = function.parameter(1); - SSAStack stack_index; + SSAStack stack_index, stack_xfrac, stack_yfrac; + SSAValue args = function.parameter(0); + SSAUBytePtr destorg = args[0][0].load(); + SSAUBytePtr source = args[0][1].load(); + SSAInt destpitch = args[0][2].load(); + stack_xfrac.store(args[0][3].load()); + stack_yfrac.store(args[0][4].load()); + SSAInt xstep = args[0][5].load(); + SSAInt ystep = args[0][6].load(); + SSAInt x1 = args[0][7].load(); + SSAInt x2 = args[0][8].load(); + SSAInt y = args[0][9].load(); + SSAInt xbits = args[0][10].load(); + SSAInt ybits = args[0][11].load(); + SSAInt light = args[0][12].load(); + SSAInt srcalpha = args[0][13].load(); + SSAInt destalpha = args[0][14].load(); + + SSAInt count = x2 - x1 + 1; + SSAUBytePtr data = destorg[(x1 + y * destpitch) * 4]; + + SSAInt yshift = 32 - ybits; + SSAInt xshift = yshift - xbits; + SSAInt xmask = ((SSAInt(1) << xbits) - 1) << ybits; + //is_64x64 = xbits == 6 && ybits == 6; + + SSAInt sseLength = count / 4; stack_index.store(0); - SSAForBlock loop; { + SSAForBlock loop; + SSAInt index = stack_index.load(); + loop.loop_block(index < sseLength); + + SSAVec4i colors[4]; + for (int i = 0; i < 4; i++) + { + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + // 64x64 is the most common case by far, so special case it. + SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + colors[i] = source[spot64 * 4].load_vec4ub() * light / 256; + + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + } + + SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); + data[index * 16].store_unaligned_vec16ub(ssecolors); + + stack_index.store(index + 1); + loop.end_block(); + } + + stack_index.store(sseLength * 4); + { + SSAForBlock loop; SSAInt index = stack_index.load(); loop.loop_block(index < count); - SSAVec4i color(0, 128, 255, 255); + SSAInt xfrac = stack_xfrac.load(); + SSAInt yfrac = stack_yfrac.load(); + + // 64x64 is the most common case by far, so special case it. + SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6)); + //SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift); + + //*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants); + SSAVec4i color = source[spot64 * 4].load_vec4ub(); + color = color * light / 256; data[index * 4].store_vec4ub(color); - /*data[index * 4].store(0); - data[index * 4 + 1].store(128); - data[index * 4 + 2].store(255); - data[index * 4 + 3].store(255);*/ + stack_index.store(index + 1); + stack_xfrac.store(xfrac + xstep); + stack_yfrac.store(yfrac + ystep); + loop.end_block(); } - loop.end_block(); builder.CreateRetVoid(); @@ -139,6 +200,27 @@ void FixedFunction::CodegenDrawSpan() mProgram.functionPassManager()->run(*function.func); } +llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context) +{ + std::vector elements; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; + elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint8_t *source; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t destpitch; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t yfrac; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xstep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ystep; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x1; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t x2; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t y; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t xbits; + elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t ybits; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha; + elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha; + return llvm::StructType::get(context, elements, false)->getPointerTo(); +} + #if 0 GlslFixedFunction::GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen) diff --git a/src/r_compiler/fixedfunction/fixedfunction.h b/src/r_compiler/fixedfunction/fixedfunction.h index 7ee68032e..3bbf05abe 100644 --- a/src/r_compiler/fixedfunction/fixedfunction.h +++ b/src/r_compiler/fixedfunction/fixedfunction.h @@ -6,6 +6,7 @@ #include "r_compiler/ssa/ssa_vec8s.h" #include "r_compiler/ssa/ssa_vec16ub.h" #include "r_compiler/ssa/ssa_int.h" +#include "r_compiler/ssa/ssa_short.h" #include "r_compiler/ssa/ssa_ubyte_ptr.h" #include "r_compiler/ssa/ssa_vec4f_ptr.h" #include "r_compiler/ssa/ssa_vec4i_ptr.h" @@ -39,16 +40,39 @@ private: std::unique_ptr mFunctionPassManager; }; +struct RenderArgs +{ + uint32_t *destorg; + const uint32_t *source; + int32_t destpitch; + int32_t xfrac; + int32_t yfrac; + int32_t xstep; + int32_t ystep; + int32_t x1; + int32_t x2; + int32_t y; + int32_t xbits; + int32_t ybits; + uint32_t light; + uint32_t srcalpha; + uint32_t destalpha; + //ShadeConstants _shade_constants; + //int32_t nearest_filter; +}; + class FixedFunction { public: FixedFunction(); - void(*DrawSpan)(int, uint32_t *) = nullptr; + void(*DrawSpan)(const RenderArgs *) = nullptr; private: void CodegenDrawSpan(); + static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); + RenderProgram mProgram; }; diff --git a/src/r_draw_rgba.cpp b/src/r_draw_rgba.cpp index d54bad7ae..975739095 100644 --- a/src/r_draw_rgba.cpp +++ b/src/r_draw_rgba.cpp @@ -354,9 +354,23 @@ public: if (thread->skipped_by_thread(_y)) return; - uint32_t *dest = ylookup[_y] + _x1 + (uint32_t*)_destorg; - int count = _x2 - _x1 + 1; - _ff->DrawSpan(count, dest); + RenderArgs args; + args.destorg = (uint32_t *)_destorg; + args.source = _source; + args.destpitch = dc_pitch; + args.xfrac = _xfrac; + args.yfrac = _yfrac; + args.xstep = _xstep; + args.ystep = _ystep; + args.x1 = _x1; + args.x2 = _x2; + args.y = _y; + args.xbits = _xbits; + args.ybits = _ybits; + args.light = _light; + args.srcalpha = _srcalpha; + args.destalpha = _destalpha; + _ff->DrawSpan(&args); } };