Codegen all DrawSpan variants

This commit is contained in:
Magnus Norddahl 2016-09-29 04:01:42 +02:00
parent efd22346d8
commit e5f3c119cd
5 changed files with 193 additions and 51 deletions

View file

@ -11,7 +11,7 @@
#include "r_compiler/ssa/ssa_value.h"
#include "r_compiler/ssa/ssa_barycentric_weight.h"
void DrawSpanCodegen::Generate(SSAValue args)
void DrawSpanCodegen::Generate(DrawSpanVariant variant, SSAValue args)
{
destorg = args[0][0].load();
source = args[0][1].load();
@ -51,44 +51,44 @@ void DrawSpanCodegen::Generate(SSAValue args)
// 64x64 is the most common case by far, so special case it.
is_64x64 = xbits == 6 && ybits == 6;
is_simple_shade = (flags & RenderArgs::simple_shade) == RenderArgs::simple_shade;
is_nearest_filter = (flags & RenderArgs::nearest_filter) == RenderArgs::nearest_filter;
is_simple_shade = (flags & DrawSpanArgs::simple_shade) == DrawSpanArgs::simple_shade;
is_nearest_filter = (flags & DrawSpanArgs::nearest_filter) == DrawSpanArgs::nearest_filter;
SSAIfBlock branch;
branch.if_block(is_simple_shade);
LoopShade(true);
LoopShade(variant, true);
branch.else_block();
LoopShade(false);
LoopShade(variant, false);
branch.end_block();
}
void DrawSpanCodegen::LoopShade(bool isSimpleShade)
void DrawSpanCodegen::LoopShade(DrawSpanVariant variant, bool isSimpleShade)
{
SSAIfBlock branch;
branch.if_block(is_nearest_filter);
LoopFilter(isSimpleShade, true);
LoopFilter(variant, isSimpleShade, true);
branch.else_block();
LoopFilter(isSimpleShade, false);
LoopFilter(variant, isSimpleShade, false);
branch.end_block();
}
void DrawSpanCodegen::LoopFilter(bool isSimpleShade, bool isNearestFilter)
void DrawSpanCodegen::LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter)
{
SSAIfBlock branch;
branch.if_block(is_64x64);
{
SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, true);
Loop(sseLength * 4, isSimpleShade, isNearestFilter, true);
SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, true);
Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, true);
}
branch.else_block();
{
SSAInt sseLength = Loop4x(isSimpleShade, isNearestFilter, false);
Loop(sseLength * 4, isSimpleShade, isNearestFilter, false);
SSAInt sseLength = Loop4x(variant, isSimpleShade, isNearestFilter, false);
Loop(sseLength * 4, variant, isSimpleShade, isNearestFilter, false);
}
branch.end_block();
}
SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64)
SSAInt DrawSpanCodegen::Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64)
{
SSAInt sseLength = count / 4;
stack_index.store(0);
@ -97,24 +97,31 @@ SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is
SSAInt index = stack_index.load();
loop.loop_block(index < sseLength);
SSAVec16ub bg = data[index * 16].load_unaligned_vec16ub();
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
SSAVec4i bgcolors[4] =
{
SSAVec4i::extendlo(bg0),
SSAVec4i::extendhi(bg0),
SSAVec4i::extendlo(bg1),
SSAVec4i::extendhi(bg1)
};
SSAVec4i colors[4];
for (int i = 0; i < 4; i++)
{
SSAInt xfrac = stack_xfrac.load();
SSAInt yfrac = stack_yfrac.load();
SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64);
if (isSimpleShade)
colors[i] = shade_bgra_simple(fg, light);
else
colors[i] = shade_bgra_advanced(fg, light, shade_constants);
colors[i] = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolors[i], variant);
stack_xfrac.store(xfrac + xstep);
stack_yfrac.store(yfrac + ystep);
}
SSAVec16ub ssecolors(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3]));
data[index * 16].store_unaligned_vec16ub(ssecolors);
SSAVec16ub color(SSAVec8s(colors[0], colors[1]), SSAVec8s(colors[2], colors[3]));
data[index * 16].store_unaligned_vec16ub(color);
stack_index.store(index + 1);
loop.end_block();
@ -122,7 +129,7 @@ SSAInt DrawSpanCodegen::Loop4x(bool isSimpleShade, bool isNearestFilter, bool is
return sseLength;
}
void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64)
void DrawSpanCodegen::Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64)
{
stack_index.store(start);
{
@ -133,13 +140,8 @@ void DrawSpanCodegen::Loop(SSAInt start, bool isSimpleShade, bool isNearestFilte
SSAInt xfrac = stack_xfrac.load();
SSAInt yfrac = stack_yfrac.load();
SSAVec4i fg = Sample(xfrac, yfrac, isNearestFilter, is64x64);
SSAVec4i color;
if (isSimpleShade)
color = shade_bgra_simple(fg, light);
else
color = shade_bgra_advanced(fg, light, shade_constants);
SSAVec4i bgcolor = data[index * 4].load_vec4ub();
SSAVec4i color = Blend(Shade(Sample(xfrac, yfrac, isNearestFilter, is64x64), isSimpleShade), bgcolor, variant);
data[index * 4].store_vec4ub(color);
stack_index.store(index + 1);
@ -173,6 +175,32 @@ SSAVec4i DrawSpanCodegen::Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilte
}
}
SSAVec4i DrawSpanCodegen::Shade(SSAVec4i fg, bool isSimpleShade)
{
if (isSimpleShade)
return shade_bgra_simple(fg, light);
else
return shade_bgra_advanced(fg, light, shade_constants);
}
SSAVec4i DrawSpanCodegen::Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant)
{
switch (variant)
{
default:
case DrawSpanVariant::Opaque:
return blend_copy(fg);
case DrawSpanVariant::Masked:
return blend_alpha_blend(fg, bg);
case DrawSpanVariant::Translucent:
case DrawSpanVariant::AddClamp:
return blend_add(fg, bg, srcalpha, destalpha);
case DrawSpanVariant::MaskedTranslucent:
case DrawSpanVariant::MaskedAddClamp:
return blend_add(fg, bg, srcalpha, calc_blend_bgalpha(fg, destalpha));
}
}
/////////////////////////////////////////////////////////////////////////////
SSAInt DrawerCodegen::calc_light_multiplier(SSAInt light)
@ -249,6 +277,14 @@ SSAVec4i DrawerCodegen::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg)
return color.insert(3, 255);
}
SSAInt DrawerCodegen::calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha)
{
SSAInt alpha = fg[3];
alpha = alpha + (alpha >> 7);
SSAInt inv_alpha = 256 - alpha;
return (destalpha * alpha + 256 * inv_alpha + 128) >> 8;
}
SSAVec4i DrawerCodegen::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
{
SSAInt frac_y0 = (texturefracy >> FRACBITS) * height;

View file

@ -43,22 +43,37 @@ public:
SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg);
// Calculates the final alpha values to be used when combined with the source texture alpha channel
SSAInt calc_blend_bgalpha(SSAVec4i fg, SSAInt destalpha);
// SampleBgra
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
};
enum class DrawSpanVariant
{
Opaque,
Masked,
Translucent,
MaskedTranslucent,
AddClamp,
MaskedAddClamp
};
class DrawSpanCodegen : public DrawerCodegen
{
public:
void Generate(SSAValue args);
void Generate(DrawSpanVariant variant, SSAValue args);
private:
void LoopShade(bool isSimpleShade);
void LoopFilter(bool isSimpleShade, bool isNearestFilter);
SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64);
void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64);
void LoopShade(DrawSpanVariant variant, bool isSimpleShade);
void LoopFilter(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter);
SSAInt Loop4x(DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
void Loop(SSAInt start, DrawSpanVariant variant, bool isSimpleShade, bool isNearestFilter, bool is64x64);
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
SSAVec4i Shade(SSAVec4i fg, bool isSimpleShade);
SSAVec4i Blend(SSAVec4i fg, SSAVec4i bg, DrawSpanVariant variant);
SSAStack<SSAInt> stack_index, stack_xfrac, stack_yfrac;

View file

@ -44,8 +44,8 @@ public:
LLVMDrawersImpl();
private:
void CodegenDrawSpan();
static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context);
void CodegenDrawSpan(const char *name, DrawSpanVariant variant);
static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context);
LLVMProgram mProgram;
};
@ -75,26 +75,37 @@ LLVMDrawers *LLVMDrawers::Instance()
LLVMDrawersImpl::LLVMDrawersImpl()
{
CodegenDrawSpan();
CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque);
CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked);
CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent);
CodegenDrawSpan("DrawSpanMaskedTranslucent", DrawSpanVariant::MaskedTranslucent);
CodegenDrawSpan("DrawSpanAddClamp", DrawSpanVariant::AddClamp);
CodegenDrawSpan("DrawSpanMaskedAddClamp", DrawSpanVariant::MaskedAddClamp);
mProgram.engine()->finalizeObject();
mProgram.modulePassManager()->run(*mProgram.module());
DrawSpan = mProgram.GetProcAddress<void(const RenderArgs *)>("DrawSpan");
DrawSpan = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpan");
DrawSpanMasked = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMasked");
DrawSpanTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanTranslucent");
DrawSpanMaskedTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMaskedTranslucent");
DrawSpanAddClamp = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanAddClamp");
DrawSpanMaskedAddClamp = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMaskedAddClamp");
mProgram.StopLogFatalErrors();
}
void LLVMDrawersImpl::CodegenDrawSpan()
void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
SSAFunction function("DrawSpan");
function.add_parameter(GetRenderArgsStruct(mProgram.context()));
SSAFunction function(name);
function.add_parameter(GetDrawSpanArgsStruct(mProgram.context()));
function.create_public();
DrawSpanCodegen codegen;
codegen.Generate(function.parameter(0));
codegen.Generate(variant, function.parameter(0));
builder.CreateRetVoid();
@ -104,7 +115,7 @@ void LLVMDrawersImpl::CodegenDrawSpan()
mProgram.functionPassManager()->run(*function.func);
}
llvm::Type *LLVMDrawersImpl::GetRenderArgsStruct(llvm::LLVMContext &context)
llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context)
{
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *destorg;

View file

@ -1,7 +1,7 @@
#pragma once
struct RenderArgs
struct DrawSpanArgs
{
uint32_t *destorg;
const uint32_t *source;
@ -45,7 +45,12 @@ public:
static void Destroy();
static LLVMDrawers *Instance();
void(*DrawSpan)(const RenderArgs *) = nullptr;
void(*DrawSpan)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMaskedTranslucent)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanAddClamp)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMaskedAddClamp)(const DrawSpanArgs *) = nullptr;
private:
static LLVMDrawers *Singleton;

View file

@ -302,7 +302,8 @@ void DrawerCommandQueue::StopThreads()
class DrawSpanLLVMCommand : public DrawerCommand
{
RenderArgs args;
protected:
DrawSpanArgs args;
public:
DrawSpanLLVMCommand()
@ -333,9 +334,9 @@ public:
args.destalpha = dc_destalpha >> (FRACBITS - 8);
args.flags = 0;
if (ds_shade_constants.simple_shade)
args.flags |= RenderArgs::simple_shade;
args.flags |= DrawSpanArgs::simple_shade;
if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped))
args.flags |= RenderArgs::nearest_filter;
args.flags |= DrawSpanArgs::nearest_filter;
}
void Execute(DrawerThread *thread) override
@ -346,6 +347,61 @@ public:
}
};
class DrawSpanMaskedLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanMasked(&args);
}
};
class DrawSpanTranslucentLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanTranslucent(&args);
}
};
class DrawSpanMaskedTranslucentLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanMaskedTranslucent(&args);
}
};
class DrawSpanAddClampLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanAddClamp(&args);
}
};
class DrawSpanMaskedAddClampLLVMCommand : public DrawSpanLLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(args.y))
return;
LLVMDrawers::Instance()->DrawSpanMaskedAddClamp(&args);
}
};
/////////////////////////////////////////////////////////////////////////////
class DrawerColumnCommand : public DrawerCommand
@ -2749,39 +2805,58 @@ void R_DrawRevSubClampTranslatedColumn_rgba()
void R_DrawSpan_rgba()
{
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanLLVMCommand>();
/*
#ifdef NO_SSE
#elif defined(NO_SSE)
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanRGBA_SSE_Command>();
#endif
*/
}
void R_DrawSpanMasked_rgba()
{
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanMaskedLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanMaskedRGBACommand>();
#endif
}
void R_DrawSpanTranslucent_rgba()
{
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanTranslucentLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanTranslucentRGBACommand>();
#endif
}
void R_DrawSpanMaskedTranslucent_rgba()
{
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanMaskedTranslucentLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanMaskedTranslucentRGBACommand>();
#endif
}
void R_DrawSpanAddClamp_rgba()
{
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanAddClampLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanAddClampRGBACommand>();
#endif
}
void R_DrawSpanMaskedAddClamp_rgba()
{
#if !defined(NO_LLVM)
DrawerCommandQueue::QueueCommand<DrawSpanMaskedAddClampLLVMCommand>();
#else
DrawerCommandQueue::QueueCommand<DrawSpanMaskedAddClampRGBACommand>();
#endif
}
void R_FillSpan_rgba()