Added codegen for column drawers

This commit is contained in:
Magnus Norddahl 2016-10-07 03:38:43 +02:00
parent e592473f57
commit c1e859dbca
10 changed files with 449 additions and 650 deletions

View file

@ -11,6 +11,151 @@
#include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_struct_type.h"
#include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_value.h"
void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args) void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data)
{ {
dest = args[0][0].load();
source = args[0][1].load();
colormap = args[0][2].load();
translation = args[0][3].load();
basecolors = args[0][4].load();
pitch = args[0][5].load();
count = args[0][6].load();
dest_y = args[0][7].load();
iscale = args[0][8].load();
texturefrac = args[0][9].load();
light = args[0][10].load();
color = SSAVec4i::unpack(args[0][11].load());
srccolor = SSAVec4i::unpack(args[0][12].load());
srcalpha = args[0][13].load();
destalpha = args[0][14].load();
SSAShort light_alpha = args[0][15].load();
SSAShort light_red = args[0][16].load();
SSAShort light_green = args[0][17].load();
SSAShort light_blue = args[0][18].load();
SSAShort fade_alpha = args[0][19].load();
SSAShort fade_red = args[0][20].load();
SSAShort fade_green = args[0][21].load();
SSAShort fade_blue = args[0][22].load();
SSAShort desaturate = args[0][23].load();
SSAInt flags = args[0][24].load();
shade_constants.light = SSAVec4i(light_blue.zext_int(), light_green.zext_int(), light_red.zext_int(), light_alpha.zext_int());
shade_constants.fade = SSAVec4i(fade_blue.zext_int(), fade_green.zext_int(), fade_red.zext_int(), fade_alpha.zext_int());
shade_constants.desaturate = desaturate.zext_int();
thread.core = thread_data[0][0].load();
thread.num_cores = thread_data[0][1].load();
thread.pass_start_y = thread_data[0][2].load();
thread.pass_end_y = thread_data[0][3].load();
is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade);
count = count_for_thread(dest_y, count, thread);
dest = dest_for_thread(dest_y, pitch, dest, thread);
pitch = pitch * thread.num_cores;
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
iscale = iscale * thread.num_cores;
SSAIfBlock branch;
branch.if_block(is_simple_shade);
Loop(variant, true);
branch.else_block();
Loop(variant, false);
branch.end_block();
}
void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade)
{
stack_index.store(SSAInt(0));
{
SSAForBlock loop;
SSAInt index = stack_index.load();
loop.loop_block(index < count);
SSAInt frac = stack_frac.load();
SSAInt offset = index * pitch * 4;
SSAVec4i bgcolor = dest[offset].load_vec4ub();
SSAInt alpha, inv_alpha;
SSAVec4i outcolor;
switch (variant)
{
default:
case DrawColumnVariant::Draw:
outcolor = blend_copy(Shade(ColormapSample(frac), isSimpleShade));
break;
case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp:
outcolor = blend_add(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawShaded:
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(frac), SSAInt(64)), SSAInt(0)) * 4;
inv_alpha = 256 - alpha;
outcolor = blend_add(color, bgcolor, alpha, inv_alpha);
break;
case DrawColumnVariant::DrawSubClamp:
outcolor = blend_sub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawRevSubClamp:
outcolor = blend_revsub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawTranslated:
outcolor = blend_copy(Shade(TranslateSample(frac), isSimpleShade));
break;
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
outcolor = blend_add(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawSubClampTranslated:
outcolor = blend_sub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawRevSubClampTranslated:
outcolor = blend_revsub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::Fill:
outcolor = blend_copy(color);
break;
case DrawColumnVariant::FillAdd:
alpha = srccolor[3];
alpha = alpha + (alpha >> 7);
inv_alpha = 256 - alpha;
outcolor = blend_add(srccolor, bgcolor, alpha, inv_alpha);
break;
case DrawColumnVariant::FillAddClamp:
outcolor = blend_add(srccolor, bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::FillSubClamp:
outcolor = blend_sub(srccolor, bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::FillRevSubClamp:
outcolor = blend_revsub(srccolor, bgcolor, srcalpha, destalpha);
break;
}
dest[offset].store_vec4ub(outcolor);
stack_index.store(index + 1);
stack_frac.store(frac + iscale);
loop.end_block();
}
}
SSAInt DrawColumnCodegen::ColormapSample(SSAInt frac)
{
SSAInt sample_index = frac >> FRACBITS;
return colormap[source[sample_index].load().zext_int()].load().zext_int();
}
SSAInt DrawColumnCodegen::TranslateSample(SSAInt frac)
{
SSAInt sample_index = frac >> FRACBITS;
return translation[source[sample_index].load().zext_int()].load().zext_int();
}
SSAVec4i DrawColumnCodegen::Shade(SSAInt palIndex, bool isSimpleShade)
{
if (isSimpleShade)
return shade_pal_index_simple(palIndex, light, basecolors);
else
return shade_pal_index_advanced(palIndex, light, shade_constants, basecolors);
} }

View file

@ -5,22 +5,53 @@
enum class DrawColumnVariant enum class DrawColumnVariant
{ {
Opaque, Fill,
Fuzz, FillAdd,
Add, FillAddClamp,
Translated, FillSubClamp,
TlatedAdd, FillRevSubClamp,
Shaded, Draw,
AddClamp, DrawAdd,
AddClampTranslated, DrawTranslated,
SubClamp, DrawTlatedAdd,
SubClampTranslated, DrawShaded,
RevSubClamp, DrawAddClamp,
RevSubClampTranslated DrawAddClampTranslated,
DrawSubClamp,
DrawSubClampTranslated,
DrawRevSubClamp,
DrawRevSubClampTranslated
}; };
class DrawColumnCodegen : public DrawerCodegen class DrawColumnCodegen : public DrawerCodegen
{ {
public: public:
void Generate(DrawColumnVariant variant, SSAValue args); void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data);
private:
void Loop(DrawColumnVariant variant, bool isSimpleShade);
SSAInt ColormapSample(SSAInt frac);
SSAInt TranslateSample(SSAInt frac);
SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade);
SSAStack<SSAInt> stack_index, stack_frac;
SSAUBytePtr dest;
SSAUBytePtr source;
SSAUBytePtr colormap;
SSAUBytePtr translation;
SSAUBytePtr basecolors;
SSAInt pitch;
SSAInt count;
SSAInt dest_y;
SSAInt iscale;
SSAInt texturefrac;
SSAInt light;
SSAVec4i color;
SSAVec4i srccolor;
SSAInt srcalpha;
SSAInt destalpha;
SSABool is_simple_shade;
SSAShadeConstants shade_constants;
SSAWorkerThread thread;
}; };

View file

@ -47,9 +47,11 @@ public:
LLVMDrawersImpl(); LLVMDrawersImpl();
private: private:
void CodegenDrawColumn(const char *name, DrawColumnVariant variant);
void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawSpan(const char *name, DrawSpanVariant variant);
void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns);
static llvm::Type *GetDrawColumnArgsStruct(llvm::LLVMContext &context);
static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawSpanArgsStruct(llvm::LLVMContext &context);
static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetDrawWallArgsStruct(llvm::LLVMContext &context);
static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context); static llvm::Type *GetWorkerThreadDataStruct(llvm::LLVMContext &context);
@ -82,6 +84,22 @@ LLVMDrawers *LLVMDrawers::Instance()
LLVMDrawersImpl::LLVMDrawersImpl() LLVMDrawersImpl::LLVMDrawersImpl()
{ {
CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill);
CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd);
CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp);
CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp);
CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp);
CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw);
CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd);
CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated);
CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd);
CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded);
CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp);
CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated);
CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp);
CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated);
CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp);
CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated);
CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque);
CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked);
CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent);
@ -104,6 +122,22 @@ LLVMDrawersImpl::LLVMDrawersImpl()
mProgram.engine()->finalizeObject(); mProgram.engine()->finalizeObject();
mProgram.modulePassManager()->run(*mProgram.module()); mProgram.modulePassManager()->run(*mProgram.module());
FillColumn = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumn");
FillColumnAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumnAdd");
FillColumnAddClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumnAddClamp");
FillColumnSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumnSubClamp");
FillColumnRevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumnRevSubClamp");
DrawColumn = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumn");
DrawColumnAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAdd");
DrawColumnTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnTranslated");
DrawColumnTlatedAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnTlatedAdd");
DrawColumnShaded = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnShaded");
DrawColumnAddClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAddClamp");
DrawColumnAddClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAddClampTranslated");
DrawColumnSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnSubClamp");
DrawColumnSubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnSubClampTranslated");
DrawColumnRevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRevSubClamp");
DrawColumnRevSubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRevSubClampTranslated");
DrawSpan = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpan"); DrawSpan = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpan");
DrawSpanMasked = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMasked"); DrawSpanMasked = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMasked");
DrawSpanTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanTranslucent"); DrawSpanTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanTranslucent");
@ -126,6 +160,27 @@ LLVMDrawersImpl::LLVMDrawersImpl()
mProgram.StopLogFatalErrors(); mProgram.StopLogFatalErrors();
} }
void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant)
{
llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
SSAFunction function(name);
function.add_parameter(GetDrawColumnArgsStruct(mProgram.context()));
function.add_parameter(GetWorkerThreadDataStruct(mProgram.context()));
function.create_public();
DrawColumnCodegen codegen;
codegen.Generate(variant, function.parameter(0), function.parameter(1));
builder.CreateRetVoid();
if (llvm::verifyFunction(*function.func))
I_FatalError("verifyFunction failed for " __FUNCTION__);
mProgram.functionPassManager()->run(*function.func);
}
void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant) void LLVMDrawersImpl::CodegenDrawSpan(const char *name, DrawSpanVariant variant)
{ {
llvm::IRBuilder<> builder(mProgram.context()); llvm::IRBuilder<> builder(mProgram.context());
@ -167,6 +222,37 @@ void LLVMDrawersImpl::CodegenDrawWall(const char *name, DrawWallVariant variant,
mProgram.functionPassManager()->run(*function.func); mProgram.functionPassManager()->run(*function.func);
} }
llvm::Type *LLVMDrawersImpl::GetDrawColumnArgsStruct(llvm::LLVMContext &context)
{
std::vector<llvm::Type *> elements;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint32_t *dest;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *source;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *colormap;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // uint8_t *translation;
elements.push_back(llvm::Type::getInt8PtrTy(context)); // const uint32_t *basecolors;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t pitch;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t count;
elements.push_back(llvm::Type::getInt32Ty(context)); // int32_t dest_y;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t iscale;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t texturefrac;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t light;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t color;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srccolor;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t srcalpha;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t destalpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t light_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_alpha;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_red;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_green;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t fade_blue;
elements.push_back(llvm::Type::getInt16Ty(context)); // uint16_t desaturate;
elements.push_back(llvm::Type::getInt32Ty(context)); // uint32_t flags;
return llvm::StructType::get(context, elements, false)->getPointerTo();
}
llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context) llvm::Type *LLVMDrawersImpl::GetDrawSpanArgsStruct(llvm::LLVMContext &context)
{ {
std::vector<llvm::Type *> elements; std::vector<llvm::Type *> elements;

View file

@ -77,6 +77,40 @@ struct DrawSpanArgs
}; };
}; };
struct DrawColumnArgs
{
uint32_t *dest;
const uint8_t *source;
uint8_t *colormap;
uint8_t *translation;
const uint32_t *basecolors;
int32_t pitch;
int32_t count;
int32_t dest_y;
uint32_t iscale;
uint32_t texturefrac;
uint32_t light;
uint32_t color;
uint32_t srccolor;
uint32_t srcalpha;
uint32_t destalpha;
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
uint32_t flags;
enum Flags
{
simple_shade = 1
};
};
class LLVMDrawers class LLVMDrawers
{ {
public: public:
@ -86,6 +120,23 @@ public:
static void Destroy(); static void Destroy();
static LLVMDrawers *Instance(); static LLVMDrawers *Instance();
void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr; void(*DrawSpanTranslucent)(const DrawSpanArgs *) = nullptr;

View file

@ -1,6 +1,7 @@
#include "r_compiler/llvm_include.h" #include "r_compiler/llvm_include.h"
#include "ssa_ubyte.h" #include "ssa_ubyte.h"
#include "ssa_int.h"
#include "ssa_scope.h" #include "ssa_scope.h"
SSAUByte::SSAUByte() SSAUByte::SSAUByte()
@ -24,6 +25,11 @@ llvm::Type *SSAUByte::llvm_type()
return llvm::Type::getInt8Ty(SSAScope::context()); return llvm::Type::getInt8Ty(SSAScope::context());
} }
SSAInt SSAUByte::zext_int()
{
return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint()));
}
SSAUByte operator+(const SSAUByte &a, const SSAUByte &b) SSAUByte operator+(const SSAUByte &a, const SSAUByte &b)
{ {
return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); return SSAUByte::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));

View file

@ -4,6 +4,8 @@
namespace llvm { class Value; } namespace llvm { class Value; }
namespace llvm { class Type; } namespace llvm { class Type; }
class SSAInt;
class SSAUByte class SSAUByte
{ {
public: public:
@ -13,6 +15,8 @@ public:
static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); } static SSAUByte from_llvm(llvm::Value *v) { return SSAUByte(v); }
static llvm::Type *llvm_type(); static llvm::Type *llvm_type();
SSAInt zext_int();
llvm::Value *v; llvm::Value *v;
}; };

View file

@ -30,22 +30,8 @@ SSAUByte SSAUBytePtr::load() const
SSAVec4i SSAUBytePtr::load_vec4ub() const SSAVec4i SSAUBytePtr::load_vec4ub() const
{ {
// _mm_cvtsi32_si128 as implemented by clang:
SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint())); SSAInt i32 = SSAInt::from_llvm(SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, llvm::Type::getInt32PtrTy(SSAScope::context()), SSAScope::hint()), false, SSAScope::hint()));
llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint()); return SSAVec4i::unpack(i32);
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint());
SSAVec4i v4i = SSAVec4i::from_llvm(v);
SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7)); // _mm_unpacklo_epi8
return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16
/*
llvm::PointerType *m4xint8typeptr = llvm::VectorType::get(llvm::Type::getInt8Ty(SSAScope::context()), 4)->getPointerTo();
llvm::Type *m4xint32type = llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4);
llvm::Value *v4ub = SSAScope::builder().CreateLoad(SSAScope::builder().CreateBitCast(v, m4xint8typeptr, SSAScope::hint()), false, SSAScope::hint());
return SSAVec4i::from_llvm(SSAScope::builder().CreateZExt(v4ub, m4xint32type));
*/
} }
SSAVec16ub SSAUBytePtr::load_vec16ub() const SSAVec16ub SSAUBytePtr::load_vec16ub() const

View file

@ -97,6 +97,19 @@ llvm::Type *SSAVec4i::llvm_type()
return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4); return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4);
} }
SSAVec4i SSAVec4i::unpack(SSAInt i32)
{
// _mm_cvtsi32_si128 as implemented by clang:
llvm::Value *v = SSAScope::builder().CreateInsertElement(llvm::UndefValue::get(SSAVec4i::llvm_type()), i32.v, SSAInt(0).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(1).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(2).v, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, SSAInt(0).v, SSAInt(3).v, SSAScope::hint());
SSAVec4i v4i = SSAVec4i::from_llvm(v);
SSAVec8s low = SSAVec8s::bitcast(SSAVec16ub::shuffle(SSAVec16ub::bitcast(v4i), SSAVec16ub((unsigned char)0), 0, 16 + 0, 1, 16 + 1, 2, 16 + 2, 3, 16 + 3, 4, 16 + 4, 5, 16 + 5, 6, 16 + 6, 7, 16 + 7)); // _mm_unpacklo_epi8
return SSAVec4i::extendlo(low); // _mm_unpacklo_epi16
}
SSAVec4i SSAVec4i::bitcast(SSAVec4f f32) SSAVec4i SSAVec4i::bitcast(SSAVec4f f32)
{ {
return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint())); return SSAVec4i::from_llvm(SSAScope::builder().CreateBitCast(f32.v, llvm_type(), SSAScope::hint()));

View file

@ -24,6 +24,7 @@ public:
SSAVec4i insert(SSAInt index, SSAInt value); SSAVec4i insert(SSAInt index, SSAInt value);
SSAVec4i insert(int index, SSAInt value); SSAVec4i insert(int index, SSAInt value);
SSAVec4i insert(int index, int value); SSAVec4i insert(int index, int value);
static SSAVec4i unpack(SSAInt value);
static SSAVec4i bitcast(SSAVec4f f32); static SSAVec4i bitcast(SSAVec4f f32);
static SSAVec4i bitcast(SSAVec8s i16); static SSAVec4i bitcast(SSAVec8s i16);
static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3); static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3);

View file

@ -396,56 +396,6 @@ public:
} }
}; };
class DrawWallMasked4LLVMCommand : public DrawWall4LLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
WorkerThreadData d = ThreadData(thread);
LLVMDrawers::Instance()->mvlinec4(&args, &d);
}
};
class DrawWallAdd4LLVMCommand : public DrawWall4LLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
WorkerThreadData d = ThreadData(thread);
LLVMDrawers::Instance()->tmvline4_add(&args, &d);
}
};
class DrawWallAddClamp4LLVMCommand : public DrawWall4LLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
WorkerThreadData d = ThreadData(thread);
LLVMDrawers::Instance()->tmvline4_addclamp(&args, &d);
}
};
class DrawWallSubClamp4LLVMCommand : public DrawWall4LLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
WorkerThreadData d = ThreadData(thread);
LLVMDrawers::Instance()->tmvline4_subclamp(&args, &d);
}
};
class DrawWallRevSubClamp4LLVMCommand : public DrawWall4LLVMCommand
{
public:
void Execute(DrawerThread *thread) override
{
WorkerThreadData d = ThreadData(thread);
LLVMDrawers::Instance()->tmvline4_revsubclamp(&args, &d);
}
};
class DrawWall1LLVMCommand : public DrawerCommand class DrawWall1LLVMCommand : public DrawerCommand
{ {
protected: protected:
@ -500,575 +450,101 @@ public:
} }
}; };
class DrawWallMasked1LLVMCommand : public DrawWall1LLVMCommand class DrawColumnLLVMCommand : public DrawerCommand
{ {
protected:
DrawColumnArgs args;
WorkerThreadData ThreadData(DrawerThread *thread)
{
WorkerThreadData d;
d.core = thread->core;
d.num_cores = thread->num_cores;
d.pass_start_y = thread->pass_start_y;
d.pass_end_y = thread->pass_end_y;
return d;
}
public: public:
DrawColumnLLVMCommand()
{
args.dest = (uint32_t*)dc_dest;
args.source = dc_source;
args.colormap = dc_colormap;
args.translation = dc_translation;
args.basecolors = (const uint32_t *)GPalette.BaseColors;
args.pitch = dc_pitch;
args.count = dc_count;
args.dest_y = _dest_y;
args.iscale = dc_iscale;
args.texturefrac = dc_texturefrac;
args.light = LightBgra::calc_light_multiplier(dc_light);
args.color = LightBgra::shade_pal_index_simple(dc_color, args.light);
args.srccolor = dc_srccolor_bgra;
args.srcalpha = dc_srcalpha >> (FRACBITS - 8);
args.destalpha = dc_destalpha >> (FRACBITS - 8);
args.light_red = dc_shade_constants.light_red;
args.light_green = dc_shade_constants.light_green;
args.light_blue = dc_shade_constants.light_blue;
args.light_alpha = dc_shade_constants.light_alpha;
args.fade_red = dc_shade_constants.fade_red;
args.fade_green = dc_shade_constants.fade_green;
args.fade_blue = dc_shade_constants.fade_blue;
args.fade_alpha = dc_shade_constants.fade_alpha;
args.desaturate = dc_shade_constants.desaturate;
args.flags = 0;
if (dc_shade_constants.simple_shade)
args.flags |= DrawColumnArgs::simple_shade;
}
void Execute(DrawerThread *thread) override void Execute(DrawerThread *thread) override
{ {
WorkerThreadData d = ThreadData(thread); WorkerThreadData d = ThreadData(thread);
LLVMDrawers::Instance()->mvlinec1(&args, &d); LLVMDrawers::Instance()->DrawColumn(&args, &d);
} }
}; };
class DrawWallAdd1LLVMCommand : public DrawWall1LLVMCommand #define DECLARE_DRAW_COMMAND(name, func, base) \
{ class name##LLVMCommand : public base \
public: { \
void Execute(DrawerThread *thread) override public: \
{ void Execute(DrawerThread *thread) override \
WorkerThreadData d = ThreadData(thread); { \
LLVMDrawers::Instance()->tmvline1_add(&args, &d); WorkerThreadData d = ThreadData(thread); \
} LLVMDrawers::Instance()->func(&args, &d); \
} \
}; };
class DrawWallAddClamp1LLVMCommand : public DrawWall1LLVMCommand //DECLARE_DRAW_COMMAND(name, func, DrawSpanLLVMCommand);
{
public:
void Execute(DrawerThread *thread) override
{
WorkerThreadData d = ThreadData(thread);
LLVMDrawers::Instance()->tmvline1_addclamp(&args, &d);
}
};
class DrawWallSubClamp1LLVMCommand : public DrawWall1LLVMCommand DECLARE_DRAW_COMMAND(DrawWallMasked4, mvlinec4, DrawWall4LLVMCommand);
{ DECLARE_DRAW_COMMAND(DrawWallAdd4, tmvline4_add, DrawWall4LLVMCommand);
public: DECLARE_DRAW_COMMAND(DrawWallAddClamp4, tmvline4_addclamp, DrawWall4LLVMCommand);
void Execute(DrawerThread *thread) override DECLARE_DRAW_COMMAND(DrawWallSubClamp4, tmvline4_subclamp, DrawWall4LLVMCommand);
{ DECLARE_DRAW_COMMAND(DrawWallRevSubClamp4, tmvline4_revsubclamp, DrawWall4LLVMCommand);
WorkerThreadData d = ThreadData(thread); DECLARE_DRAW_COMMAND(DrawWallMasked1, mvlinec1, DrawWall1LLVMCommand);
LLVMDrawers::Instance()->tmvline1_subclamp(&args, &d); DECLARE_DRAW_COMMAND(DrawWallAdd1, tmvline1_add, DrawWall1LLVMCommand);
} DECLARE_DRAW_COMMAND(DrawWallAddClamp1, tmvline1_addclamp, DrawWall1LLVMCommand);
}; DECLARE_DRAW_COMMAND(DrawWallSubClamp1, tmvline1_subclamp, DrawWall1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawWallRevSubClamp1, tmvline1_revsubclamp, DrawWall1LLVMCommand);
class DrawWallRevSubClamp1LLVMCommand : public DrawWall1LLVMCommand DECLARE_DRAW_COMMAND(DrawColumnAdd, DrawColumnAdd, DrawColumnLLVMCommand);
{ DECLARE_DRAW_COMMAND(DrawColumnTranslated, DrawColumnTranslated, DrawColumnLLVMCommand);
public: DECLARE_DRAW_COMMAND(DrawColumnTlatedAdd, DrawColumnTlatedAdd, DrawColumnLLVMCommand);
void Execute(DrawerThread *thread) override DECLARE_DRAW_COMMAND(DrawColumnShaded, DrawColumnShaded, DrawColumnLLVMCommand);
{ DECLARE_DRAW_COMMAND(DrawColumnAddClamp, DrawColumnAddClamp, DrawColumnLLVMCommand);
WorkerThreadData d = ThreadData(thread); DECLARE_DRAW_COMMAND(DrawColumnAddClampTranslated, DrawColumnAddClampTranslated, DrawColumnLLVMCommand);
LLVMDrawers::Instance()->tmvline1_revsubclamp(&args, &d); DECLARE_DRAW_COMMAND(DrawColumnSubClamp, DrawColumnSubClamp, DrawColumnLLVMCommand);
} DECLARE_DRAW_COMMAND(DrawColumnSubClampTranslated, DrawColumnSubClampTranslated, DrawColumnLLVMCommand);
}; DECLARE_DRAW_COMMAND(DrawColumnRevSubClamp, DrawColumnRevSubClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRevSubClampTranslated, DrawColumnRevSubClampTranslated, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumn, FillColumn, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnAdd, FillColumnAdd, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnAddClamp, FillColumnAddClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnSubClamp, FillColumnSubClamp, DrawColumnLLVMCommand);
DECLARE_DRAW_COMMAND(FillColumnRevSubClamp, FillColumnRevSubClamp, DrawColumnLLVMCommand);
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
class DrawerColumnCommand : public DrawerCommand
{
public:
int _count;
BYTE * RESTRICT _dest;
int _pitch;
DWORD _iscale;
DWORD _texturefrac;
DrawerColumnCommand()
{
_count = dc_count;
_dest = dc_dest;
_iscale = dc_iscale;
_texturefrac = dc_texturefrac;
_pitch = dc_pitch;
}
class LoopIterator
{
public:
int count;
uint32_t *dest;
int pitch;
fixed_t fracstep;
fixed_t frac;
LoopIterator(DrawerColumnCommand *command, DrawerThread *thread)
{
count = thread->count_for_thread(command->_dest_y, command->_count);
if (count <= 0)
return;
dest = thread->dest_for_thread(command->_dest_y, command->_pitch, (uint32_t*)command->_dest);
pitch = command->_pitch * thread->num_cores;
fracstep = command->_iscale * thread->num_cores;
frac = command->_texturefrac + command->_iscale * thread->skipped_by_thread(command->_dest_y);
}
uint32_t sample_index()
{
return frac >> FRACBITS;
}
explicit operator bool()
{
return count > 0;
}
bool next()
{
dest += pitch;
frac += fracstep;
return (--count) != 0;
}
};
};
class DrawColumnRGBACommand : public DrawerColumnCommand
{
uint32_t _light;
const BYTE * RESTRICT _source;
ShadeConstants _shade_constants;
BYTE * RESTRICT _colormap;
public:
DrawColumnRGBACommand()
{
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_source = dc_source;
_colormap = dc_colormap;
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants);
*loop.dest = BlendBgra::copy(fg);
} while (loop.next());
}
};
class FillColumnRGBACommand : public DrawerColumnCommand
{
uint32_t _color;
public:
FillColumnRGBACommand()
{
uint32_t light = LightBgra::calc_light_multiplier(dc_light);
_color = LightBgra::shade_pal_index_simple(dc_color, light);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
*loop.dest = BlendBgra::copy(_color);
} while (loop.next());
}
};
class FillAddColumnRGBACommand : public DrawerColumnCommand
{
uint32_t _srccolor;
public:
FillAddColumnRGBACommand()
{
_srccolor = dc_srccolor_bgra;
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
uint32_t alpha = APART(_srccolor);
alpha += alpha >> 7;
do
{
*loop.dest = BlendBgra::add(_srccolor, *loop.dest, alpha, 256 - alpha);
} while (loop.next());
}
};
class FillAddClampColumnRGBACommand : public DrawerColumnCommand
{
int _color;
uint32_t _srccolor;
uint32_t _srcalpha;
uint32_t _destalpha;
public:
FillAddClampColumnRGBACommand()
{
_color = dc_color;
_srccolor = dc_srccolor_bgra;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
*loop.dest = BlendBgra::add(_srccolor, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class FillSubClampColumnRGBACommand : public DrawerColumnCommand
{
uint32_t _srccolor;
uint32_t _srcalpha;
uint32_t _destalpha;
public:
FillSubClampColumnRGBACommand()
{
_srccolor = dc_srccolor_bgra;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
*loop.dest = BlendBgra::sub(_srccolor, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class FillRevSubClampColumnRGBACommand : public DrawerColumnCommand
{
uint32_t _srccolor;
uint32_t _srcalpha;
uint32_t _destalpha;
public:
FillRevSubClampColumnRGBACommand()
{
_srccolor = dc_srccolor_bgra;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
*loop.dest = BlendBgra::revsub(_srccolor, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawAddColumnRGBACommand : public DrawerColumnCommand
{
const BYTE * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
uint32_t _srcalpha;
uint32_t _destalpha;
BYTE * RESTRICT _colormap;
public:
DrawAddColumnRGBACommand()
{
_source = dc_source;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
_colormap = dc_colormap;
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_colormap[_source[loop.sample_index()]], _light, _shade_constants);
*loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawTranslatedColumnRGBACommand : public DrawerColumnCommand
{
fixed_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _translation;
const BYTE * RESTRICT _source;
public:
DrawTranslatedColumnRGBACommand()
{
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_translation = dc_translation;
_source = dc_source;
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants);
*loop.dest = BlendBgra::copy(fg);
} while (loop.next());
}
};
class DrawTlatedAddColumnRGBACommand : public DrawerColumnCommand
{
fixed_t _light;
ShadeConstants _shade_constants;
BYTE * RESTRICT _translation;
const BYTE * RESTRICT _source;
uint32_t _srcalpha;
uint32_t _destalpha;
public:
DrawTlatedAddColumnRGBACommand()
{
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_translation = dc_translation;
_source = dc_source;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants);
*loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawShadedColumnRGBACommand : public DrawerColumnCommand
{
private:
const BYTE * RESTRICT _source;
lighttable_t * RESTRICT _colormap;
uint32_t _color;
public:
DrawShadedColumnRGBACommand()
{
_source = dc_source;
_colormap = dc_colormap;
_color = LightBgra::shade_pal_index_simple(dc_color, LightBgra::calc_light_multiplier(dc_light));
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t alpha = clamp<uint32_t>(_colormap[_source[loop.sample_index()]], 0, 64) * 4;
uint32_t inv_alpha = 256 - alpha;
*loop.dest = BlendBgra::add(_color, *loop.dest, alpha, inv_alpha);
} while (loop.next());
}
};
class DrawAddClampColumnRGBACommand : public DrawerColumnCommand
{
const BYTE * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
uint32_t _srcalpha;
uint32_t _destalpha;
public:
DrawAddClampColumnRGBACommand()
{
_source = dc_source;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants);
*loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawAddClampTranslatedColumnRGBACommand : public DrawerColumnCommand
{
BYTE * RESTRICT _translation;
const BYTE * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
uint32_t _srcalpha;
uint32_t _destalpha;
public:
DrawAddClampTranslatedColumnRGBACommand()
{
_translation = dc_translation;
_source = dc_source;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants);
*loop.dest = BlendBgra::add(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawSubClampColumnRGBACommand : public DrawerColumnCommand
{
const BYTE * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
uint32_t _srcalpha;
uint32_t _destalpha;
public:
DrawSubClampColumnRGBACommand()
{
_source = dc_source;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants);
*loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand
{
const BYTE * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
uint32_t _srcalpha;
uint32_t _destalpha;
BYTE * RESTRICT _translation;
public:
DrawSubClampTranslatedColumnRGBACommand()
{
_source = dc_source;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
_translation = dc_translation;
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants);
*loop.dest = BlendBgra::sub(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawRevSubClampColumnRGBACommand : public DrawerColumnCommand
{
const BYTE * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
uint32_t _srcalpha;
uint32_t _destalpha;
public:
DrawRevSubClampColumnRGBACommand()
{
_source = dc_source;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_source[loop.sample_index()], _light, _shade_constants);
*loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawRevSubClampTranslatedColumnRGBACommand : public DrawerColumnCommand
{
const BYTE * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
uint32_t _srcalpha;
uint32_t _destalpha;
BYTE * RESTRICT _translation;
public:
DrawRevSubClampTranslatedColumnRGBACommand()
{
_source = dc_source;
_light = LightBgra::calc_light_multiplier(dc_light);
_shade_constants = dc_shade_constants;
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
_translation = dc_translation;
}
void Execute(DrawerThread *thread) override
{
LoopIterator loop(this, thread);
if (!loop) return;
do
{
uint32_t fg = LightBgra::shade_pal_index(_translation[_source[loop.sample_index()]], _light, _shade_constants);
*loop.dest = BlendBgra::revsub(fg, *loop.dest, _srcalpha, _destalpha);
} while (loop.next());
}
};
class DrawFuzzColumnRGBACommand : public DrawerCommand class DrawFuzzColumnRGBACommand : public DrawerCommand
{ {
int _x; int _x;
@ -1830,32 +1306,32 @@ void R_EndDrawerCommands()
void R_DrawColumn_rgba() void R_DrawColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnLLVMCommand>();
} }
void R_FillColumn_rgba() void R_FillColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<FillColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<FillColumnLLVMCommand>();
} }
void R_FillAddColumn_rgba() void R_FillAddColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<FillAddColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<FillColumnAddLLVMCommand>();
} }
void R_FillAddClampColumn_rgba() void R_FillAddClampColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<FillAddClampColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<FillColumnAddClampLLVMCommand>();
} }
void R_FillSubClampColumn_rgba() void R_FillSubClampColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<FillSubClampColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<FillColumnSubClampLLVMCommand>();
} }
void R_FillRevSubClampColumn_rgba() void R_FillRevSubClampColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<FillRevSubClampColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<FillColumnRevSubClampLLVMCommand>();
} }
void R_DrawFuzzColumn_rgba() void R_DrawFuzzColumn_rgba()
@ -1870,52 +1346,52 @@ void R_DrawFuzzColumn_rgba()
void R_DrawAddColumn_rgba() void R_DrawAddColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawAddColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnAddLLVMCommand>();
} }
void R_DrawTranslatedColumn_rgba() void R_DrawTranslatedColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawTranslatedColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnTranslatedLLVMCommand>();
} }
void R_DrawTlatedAddColumn_rgba() void R_DrawTlatedAddColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawTlatedAddColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnTlatedAddLLVMCommand>();
} }
void R_DrawShadedColumn_rgba() void R_DrawShadedColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawShadedColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnShadedLLVMCommand>();
} }
void R_DrawAddClampColumn_rgba() void R_DrawAddClampColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawAddClampColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnAddClampLLVMCommand>();
} }
void R_DrawAddClampTranslatedColumn_rgba() void R_DrawAddClampTranslatedColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawAddClampTranslatedColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnAddClampTranslatedLLVMCommand>();
} }
void R_DrawSubClampColumn_rgba() void R_DrawSubClampColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawSubClampColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnSubClampLLVMCommand>();
} }
void R_DrawSubClampTranslatedColumn_rgba() void R_DrawSubClampTranslatedColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawSubClampTranslatedColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnSubClampTranslatedLLVMCommand>();
} }
void R_DrawRevSubClampColumn_rgba() void R_DrawRevSubClampColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawRevSubClampColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnRevSubClampLLVMCommand>();
} }
void R_DrawRevSubClampTranslatedColumn_rgba() void R_DrawRevSubClampTranslatedColumn_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawRevSubClampTranslatedColumnRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawColumnRevSubClampTranslatedLLVMCommand>();
} }
void R_DrawSpan_rgba() void R_DrawSpan_rgba()