Codegen all DrawSpan variants

This commit is contained in:
Magnus Norddahl 2016-09-29 04:01:42 +02:00
parent efd22346d8
commit e5f3c119cd
5 changed files with 193 additions and 51 deletions

View file

@ -11,7 +11,7 @@
#include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_value.h"
#include "r_compiler/ssa/ssa_barycentric_weight.h" #include "r_compiler/ssa/ssa_barycentric_weight.h"
void DrawSpanCodegen::Generate(SSAValue args) void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args)
{ {
destorg = args[0][0].load(); destorg = args[0][0].load();
source = args[0][1].load(); source = args[0][1].load();
@ -51,44 +51,44 @@ void DrawSpanCodegen::Generate(SSAValue args)
// 64x64 is the most common case by far, so special case it. // 64x64 is the most common case by far, so special case it.
is_64x64 = xbits == 6 && ybits == 6; is_64x64 = xbits == 6 && ybits == 6;
is_simple_shade = (flags & RenderArgs::simple_shade) == RenderArgs::simple_shade; is_simple_shade = (flags & DrawSpanArgs::simple_shade) == DrawSpanArgs::simple_shade;
is_nearest_filter = (flags & RenderArgs::nearest_filter) == RenderArgs::nearest_filter; is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == DrawSpanArgs::nearest_filter;
SSAIfBlock branch; SSAIfBlock branch;
branch.if_block(is_simple_shade); branch.if_block(is_simple_shade);
LoopShade(true); LoopShade(variant, true);
branch.else_block(); branch.else_block();
LoopShade(false); LoopShade(variant, false);
branch.end_block(); branch.end_block();
} }
void DrawSpanCodegen::LoopShade(bool isSimpleShade) void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade)
{ {
SSAIfBlock branch; SSAIfBlock branch;
branch.if_block(is_nearest_filter); branch.if_block(is_nearest_filter);
LoopFilter(isSimpleShade, true); LoopFilter(variant, isSimpleShade, true);
branch.else_block(); branch.else_block();
LoopFilter(isSimpleShade, false); LoopFilter(variant, isSimpleShade, false);
branch.end_block(); branch.end_block();
} }
void DrawSpanCodegen::LoopFilter(bool isSimpleShade, bool isNearestFilter) void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter)
{ {
SSAIfBlock branch; SSAIfBlock branch;
branch.if_block(is_64x64); branch.if_block(is_64x64);
{ {
SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, true); SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, true);
Loop(sseLength * 4, isSimpleShade, isNearestFilter, true); Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, true);
} }
branch.else_block(); branch.else_block();
{ {
SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, false); SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, false);
Loop(sseLength * 4, isSimpleShade, isNearestFilter, false); Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, false);
} }
branch.end_block(); branch.end_block();
} }
SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64) SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64)
{ {
SSAInt sseLength = count / 4; SSAInt sseLength = count / 4;
stack_index.store(0); stack_index.store(0);
@ -97,24 +97,31 @@ SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is
SSAInt index = stack_index.load(); SSAInt index = stack_index.load();
loop.loop_block(index < sseLength); loop.loop_block(index < sseLength);
SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub();
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
SSAVec4i bgcolors[4] =
{
SSAVec4i::extendlo(bg0),
SSAVec4i::extendhi(bg0),
SSAVec4i::extendlo(bg1),
SSAVec4i::extendhi(bg1)
};
SSAVec4i colors[4]; SSAVec4i colors[4];
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
{ {
SSAInt xfrac = stack_xfrac.load(); SSAInt xfrac = stack_xfrac.load();
SSAInt yfrac = stack_yfrac.load(); SSAInt yfrac = stack_yfrac.load();
SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); colors[i] = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolors[i], variant);
if (isSimpleShade)
colors[i] = shade_bgra_simple(fg, light);
else
colors[i] = shade_bgra_advanced(fg, light, shade_constants);
stack_xfrac.store(xfrac + xstep); stack_xfrac.store(xfrac + xstep);
stack_yfrac.store(yfrac + ystep); stack_yfrac.store(yfrac + ystep);
} }
SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3])); SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3]));
data[index * 16].store_unaligned_vec16ub(ssecolors); data[index * 16].store_unaligned_vec16ub(color);
stack_index.store(index + 1); stack_index.store(index + 1);
loop.end_block(); loop.end_block();
@ -122,7 +129,7 @@ SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is
return sseLength; return sseLength;
} }
void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64) void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64)
{ {
stack_index.store(start); stack_index.store(start);
{ {
@ -133,13 +140,8 @@ void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilte
SSAInt xfrac = stack_xfrac.load(); SSAInt xfrac = stack_xfrac.load();
SSAInt yfrac = stack_yfrac.load(); SSAInt yfrac = stack_yfrac.load();
SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64); SSAVec4i bgcolor = data[index * 4].load_vec4ub();
SSAVec4i color; SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant);
if (isSimpleShade)
color = shade_bgra_simple(fg, light);
else
color = shade_bgra_advanced(fg, light, shade_constants);
data[index * 4].store_vec4ub(color); data[index * 4].store_vec4ub(color);
stack_index.store(index + 1); stack_index.store(index + 1);
@ -173,6 +175,32 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte
} }
} }
SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
{
if (isSimpleShade)
return shade_bgra_simple(fg, light);
else
return shade_bgra_advanced(fg, light, shade_constants);
}
SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant)
{
switch (variant)
{
default:
case DrawSpanVariant::Opaque:
return blend_copy(fg);
case DrawSpanVariant::Masked:
return blend_alpha_blend(fg, bg);
case DrawSpanVariant::Translucent:
case DrawSpanVariant::AddClamp:
return blend_add(fg, bg, srcalpha, destalpha);
case DrawSpanVariant::MaskedTranslucent:
case DrawSpanVariant::MaskedAddClamp:
return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
}
}
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light) SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light)
@ -249,6 +277,14 @@ SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg)
return color.insert(3, 255); return color.insert(3, 255);
} }
SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha)
{
SSAInt alpha = fg[3];
alpha = alpha + (alpha >> 7);
SSAInt inv_alpha = 256 - alpha;
return (destalpha * alpha + 256 * inv_alpha + 128) >> 8;
}
SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height) SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
{ {
SSAInt frac_y0 = (texturefracy >> FRACBITS) * height; SSAInt frac_y0 = (texturefracy >> FRACBITS) * height;

View file

@ -43,22 +43,37 @@ public:
SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha); SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg); SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg);
// Calculates the final alpha values to be used when combined with the source texture alpha channel
SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha);
// SampleBgra // SampleBgra
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
}; };
enum class DrawSpanVariant
{
Opaque,
Masked,
Translucent,
MaskedTranslucent,
AddClamp,
MaskedAddClamp
};
class DrawSpanCodegen : public DrawerCodegen class DrawSpanCodegen : public DrawerCodegen
{ {
public: public:
void Generate(SSAValue args); void Generate(DrawSpanVariant variant, SSAValue args);
private: private:
void LoopShade(bool isSimpleShade); void LoopShade(DrawSpanVariant variant, bool isSimpleShade);
void LoopFilter(bool isSimpleShade, bool isNearestFilter); void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter);
SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64); SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64); void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64); SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade);
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant);
SSAStack<SSAInt> stack_index, stack_xfrac, stack_yfrac; SSAStack<SSAInt> stack_index, stack_xfrac, stack_yfrac;

View file

@ -44,8 +44,8 @@ public:
LLVMDrawersImpl(); LLVMDrawersImpl();
private: private:
void CodegenDrawSpan(); void CodegenDrawSpan(const char *name, DrawSpanVariant variant);
static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context);
LLVMProgram mProgram; LLVMProgram mProgram;
}; };
@ -75,26 +75,37 @@ LLVMDrawers *LLVMDrawers::Instance()
LLVMDrawersImpl::LLVMDrawersImpl() LLVMDrawersImpl::LLVMDrawersImpl()
{ {
CodegenDrawSpan(); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque);
CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked);
CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent);
CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent);
CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp);
CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp);
mProgram.engine()->finalizeObject(); mProgram.engine()->finalizeObject();
mProgram.modulePassManager()->run(*mProgram.module()); mProgram.modulePassManager()->run(*mProgram.module());
DrawSpan = mProgram.GetProcAddress<void(const RenderArgs *)>("DrawSpan"); DrawSpan = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpan");
DrawSpanMasked = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMasked");
DrawSpanTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanTranslucent");
DrawSpanMaskedTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMaskedTranslucent");
DrawSpanAddClamp = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanAddClamp");
DrawSpanMaskedAddClamp = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMaskedAddClamp");
mProgram.StopLogFatalErrors(); mProgram.StopLogFatalErrors();
} }
void LLVMDrawersImpl::CodegenDrawSpan() void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
{ {
llvm::IRBuilder<> builder(mProgram.context()); llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
SSAFunction function("DrawSpan"); SSAFunction function(name);
function.add_parameter(GetRenderArgsStruct(mProgram.context())); function.add_parameter(GetDrawSpanArgsStruct(mProgram.context()));
function.create_public(); function.create_public();
DrawSpanCodegen codegen; DrawSpanCodegen codegen;
codegen.Generate(function.parameter(0)); codegen.Generate(variant, function.parameter(0));
builder.CreateRetVoid(); builder.CreateRetVoid();
@ -104,7 +115,7 @@ void LLVMDrawersImpl::CodegenDrawSpan()
mProgram.functionPassManager()->run(*function.func); mProgram.functionPassManager()->run(*function.func);
} }
llvm::Type *LLVMDrawersImpl::GetRenderArgsStruct(llvm::LLVMContext &context) llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context)
{ {
std::vector<llvm::Type *> elements; std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg; elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg;

View file

@ -1,7 +1,7 @@
#pragma once #pragma once
struct RenderArgs struct DrawSpanArgs
{ {
uint32_t *destorg; uint32_t *destorg;
const uint32_t *source; const uint32_t *source;
@ -45,7 +45,12 @@ public:
static void Destroy(); static void Destroy();
static LLVMDrawers *Instance(); static LLVMDrawers *Instance();
void(*DrawSpan)(const RenderArgs *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMaskedTranslucent)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr;
private: private:
static LLVMDrawers *Singleton; static LLVMDrawers *Singleton;

View file

@ -302,7 +302,8 @@ void DrawerCommandQueue::StopThreads()
class DrawSpanLLVMCommand : public DrawerCommand class DrawSpanLLVMCommand : public DrawerCommand
{ {
RenderArgs args; protected:
DrawSpanArgs args;
public: public:
DrawSpanLLVMCommand() DrawSpanLLVMCommand()
@ -333,9 +334,9 @@ public:
args.destalpha = dc_destalpha >> (FRACBITS - 8); args.destalpha = dc_destalpha >> (FRACBITS - 8);
args.flags = 0; args.flags = 0;
if (ds_shade_constants.simple_shade) if (ds_shade_constants.simple_shade)
args.flags |= RenderArgs::simple_shade; args.flags |= DrawSpanArgs::simple_shade;
if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped)) if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped))
args.flags |= RenderArgs::nearest_filter; args.flags |= DrawSpanArgs::nearest_filter;
} }
void Execute(DrawerThread *thread) override void Execute(DrawerThread *thread) override
@ -346,6 +347,61 @@ public:
} }
}; };
class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanMasked(&args);
}
};
class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanTranslucent(&args);
}
};
class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanMaskedTranslucent(&args);
}
};
class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanAddClamp(&args);
}
};
class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanMaskedAddClamp(&args);
}
};
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
class DrawerColumnCommand : public DrawerCommand class DrawerColumnCommand : public DrawerCommand
@ -2749,39 +2805,58 @@ void R_DrawRevSubClampTranslatedColumn_rgba()
void R_DrawSpan_rgba() void R_DrawSpan_rgba()
{ {
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanLLVMCommand>(); DrawerCommandQueue::QueueCommand<DrawSpanLLVMCommand>();
/* #elif defined(NO_SSE)
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
#else #else
DrawerCommandQueue::QueueCommand<DrawSpanRGBA_SSE_Command>(); DrawerCommandQueue::QueueCommand<DrawSpanRGBA_SSE_Command>();
#endif #endif
*/
} }
void R_DrawSpanMasked_rgba() void R_DrawSpanMasked_rgba()
{ {
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanMaskedLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanMaskedRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawSpanMaskedRGBACommand>();
#endif
} }
void R_DrawSpanTranslucent_rgba() void R_DrawSpanTranslucent_rgba()
{ {
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanTranslucentLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanTranslucentRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawSpanTranslucentRGBACommand>();
#endif
} }
void R_DrawSpanMaskedTranslucent_rgba() void R_DrawSpanMaskedTranslucent_rgba()
{ {
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanMaskedTranslucentLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanMaskedTranslucentRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawSpanMaskedTranslucentRGBACommand>();
#endif
} }
void R_DrawSpanAddClamp_rgba() void R_DrawSpanAddClamp_rgba()
{ {
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanAddClampLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanAddClampRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawSpanAddClampRGBACommand>();
#endif
} }
void R_DrawSpanMaskedAddClamp_rgba() void R_DrawSpanMaskedAddClamp_rgba()
{ {
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanMaskedAddClampLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanMaskedAddClampRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawSpanMaskedAddClampRGBACommand>();
#endif
} }
void R_FillSpan_rgba() void R_FillSpan_rgba()