Add codegen for rt column drawers

This commit is contained in:
Magnus Norddahl 2016-10-07 06:40:29 +02:00
parent 584220edf0
commit 8c259f50b1
7 changed files with 292 additions and 137 deletions

View file

@ -11,7 +11,7 @@
#include "r_compiler/ssa/ssa_struct_type.h" #include "r_compiler/ssa/ssa_struct_type.h"
#include "r_compiler/ssa/ssa_value.h" #include "r_compiler/ssa/ssa_value.h"
void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data) void DrawColumnCodegen::Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data)
{ {
dest = args[0][0].load(); dest = args[0][0].load();
source = args[0][1].load(); source = args[0][1].load();
@ -21,6 +21,7 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAVa
pitch = args[0][5].load(); pitch = args[0][5].load();
count = args[0][6].load(); count = args[0][6].load();
dest_y = args[0][7].load(); dest_y = args[0][7].load();
if (method == DrawColumnMethod::Normal)
iscale = args[0][8].load(); iscale = args[0][8].load();
texturefrac = args[0][9].load(); texturefrac = args[0][9].load();
light = args[0][10].load(); light = args[0][10].load();
@ -46,109 +47,148 @@ void DrawColumnCodegen::Generate(DrawColumnVariant variant, SSAValue args, SSAVa
thread.num_cores = thread_data[0][1].load(); thread.num_cores = thread_data[0][1].load();
thread.pass_start_y = thread_data[0][2].load(); thread.pass_start_y = thread_data[0][2].load();
thread.pass_end_y = thread_data[0][3].load(); thread.pass_end_y = thread_data[0][3].load();
thread.temp = thread_data[0][4].load();
is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade); is_simple_shade = (flags & DrawColumnArgs::simple_shade) == SSAInt(DrawColumnArgs::simple_shade);
count = count_for_thread(dest_y, count, thread); count = count_for_thread(dest_y, count, thread);
dest = dest_for_thread(dest_y, pitch, dest, thread); dest = dest_for_thread(dest_y, pitch, dest, thread);
pitch = pitch * thread.num_cores; pitch = pitch * thread.num_cores;
if (method == DrawColumnMethod::Normal)
{
stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread)); stack_frac.store(texturefrac + iscale * skipped_by_thread(dest_y, thread));
iscale = iscale * thread.num_cores; iscale = iscale * thread.num_cores;
}
else
{
source = thread.temp[((dest_y + skipped_by_thread(dest_y, thread)) * 4 + texturefrac) * 4];
}
SSAIfBlock branch; SSAIfBlock branch;
branch.if_block(is_simple_shade); branch.if_block(is_simple_shade);
Loop(variant, true); Loop(variant, method, true);
branch.else_block(); branch.else_block();
Loop(variant, false); Loop(variant, method, false);
branch.end_block(); branch.end_block();
} }
void DrawColumnCodegen::Loop(DrawColumnVariant variant, bool isSimpleShade) void DrawColumnCodegen::Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade)
{ {
SSAInt sincr;
if (method != DrawColumnMethod::Normal)
sincr = thread.num_cores * 4;
stack_index.store(SSAInt(0)); stack_index.store(SSAInt(0));
{ {
SSAForBlock loop; SSAForBlock loop;
SSAInt index = stack_index.load(); SSAInt index = stack_index.load();
loop.loop_block(index < count); loop.loop_block(index < count);
SSAInt frac = stack_frac.load(); SSAInt sample_index, frac;
if (method == DrawColumnMethod::Normal)
SSAInt offset = index * pitch * 4;
SSAVec4i bgcolor = dest[offset].load_vec4ub();
SSAInt alpha, inv_alpha;
SSAVec4i outcolor;
switch (variant)
{ {
default: frac = stack_frac.load();
case DrawColumnVariant::Draw: sample_index = frac >> FRACBITS;
outcolor = blend_copy(Shade(ColormapSample(frac), isSimpleShade)); }
break; else
case DrawColumnVariant::DrawAdd: {
case DrawColumnVariant::DrawAddClamp: sample_index = index * sincr * 4;
outcolor = blend_add(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawShaded:
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(frac), SSAInt(64)), SSAInt(0)) * 4;
inv_alpha = 256 - alpha;
outcolor = blend_add(color, bgcolor, alpha, inv_alpha);
break;
case DrawColumnVariant::DrawSubClamp:
outcolor = blend_sub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawRevSubClamp:
outcolor = blend_revsub(Shade(ColormapSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawTranslated:
outcolor = blend_copy(Shade(TranslateSample(frac), isSimpleShade));
break;
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
outcolor = blend_add(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawSubClampTranslated:
outcolor = blend_sub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::DrawRevSubClampTranslated:
outcolor = blend_revsub(Shade(TranslateSample(frac), isSimpleShade), bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::Fill:
outcolor = blend_copy(color);
break;
case DrawColumnVariant::FillAdd:
alpha = srccolor[3];
alpha = alpha + (alpha >> 7);
inv_alpha = 256 - alpha;
outcolor = blend_add(srccolor, bgcolor, alpha, inv_alpha);
break;
case DrawColumnVariant::FillAddClamp:
outcolor = blend_add(srccolor, bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::FillSubClamp:
outcolor = blend_sub(srccolor, bgcolor, srcalpha, destalpha);
break;
case DrawColumnVariant::FillRevSubClamp:
outcolor = blend_revsub(srccolor, bgcolor, srcalpha, destalpha);
break;
} }
dest[offset].store_vec4ub(outcolor); SSAInt offset = index * pitch * 4;
SSAVec4i bgcolor[4];
int numColumns = (method == DrawColumnMethod::Rt4) ? 4 : 1;
if (numColumns == 4)
{
SSAVec16ub bg = dest[offset].load_unaligned_vec16ub();
SSAVec8s bg0 = SSAVec8s::extendlo(bg);
SSAVec8s bg1 = SSAVec8s::extendhi(bg);
bgcolor[0] = SSAVec4i::extendlo(bg0);
bgcolor[1] = SSAVec4i::extendhi(bg0);
bgcolor[2] = SSAVec4i::extendlo(bg1);
bgcolor[3] = SSAVec4i::extendhi(bg1);
}
else
{
bgcolor[0] = dest[offset].load_vec4ub();
}
SSAVec4i outcolor[4];
for (int i = 0; i < numColumns; i++)
outcolor[i] = ProcessPixel(sample_index + i * 4, bgcolor[i], variant, isSimpleShade);
if (numColumns == 4)
{
SSAVec16ub packedcolor(SSAVec8s(outcolor[0], outcolor[1]), SSAVec8s(outcolor[2], outcolor[3]));
dest[offset].store_unaligned_vec16ub(packedcolor);
}
else
{
dest[offset].store_vec4ub(outcolor[0]);
}
stack_index.store(index + 1); stack_index.store(index + 1);
if (method == DrawColumnMethod::Normal)
stack_frac.store(frac + iscale); stack_frac.store(frac + iscale);
loop.end_block(); loop.end_block();
} }
} }
SSAInt DrawColumnCodegen::ColormapSample(SSAInt frac) SSAVec4i DrawColumnCodegen::ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade)
{
SSAInt alpha, inv_alpha;
switch (variant)
{
default:
case DrawColumnVariant::DrawCopy:
return blend_copy(basecolors[ColormapSample(sample_index) * 4].load_vec4ub());
case DrawColumnVariant::Draw:
return blend_copy(Shade(ColormapSample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawAdd:
case DrawColumnVariant::DrawAddClamp:
return blend_add(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawShaded:
alpha = SSAInt::MAX(SSAInt::MIN(ColormapSample(sample_index), SSAInt(64)), SSAInt(0)) * 4;
inv_alpha = 256 - alpha;
return blend_add(color, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::DrawSubClamp:
return blend_sub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawRevSubClamp:
return blend_revsub(Shade(ColormapSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawTranslated:
return blend_copy(Shade(TranslateSample(sample_index), isSimpleShade));
case DrawColumnVariant::DrawTlatedAdd:
case DrawColumnVariant::DrawAddClampTranslated:
return blend_add(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawSubClampTranslated:
return blend_sub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::DrawRevSubClampTranslated:
return blend_revsub(Shade(TranslateSample(sample_index), isSimpleShade), bgcolor, srcalpha, destalpha);
case DrawColumnVariant::Fill:
return blend_copy(color);
case DrawColumnVariant::FillAdd:
alpha = srccolor[3];
alpha = alpha + (alpha >> 7);
inv_alpha = 256 - alpha;
return blend_add(srccolor, bgcolor, alpha, inv_alpha);
case DrawColumnVariant::FillAddClamp:
return blend_add(srccolor, bgcolor, srcalpha, destalpha);
case DrawColumnVariant::FillSubClamp:
return blend_sub(srccolor, bgcolor, srcalpha, destalpha);
case DrawColumnVariant::FillRevSubClamp:
return blend_revsub(srccolor, bgcolor, srcalpha, destalpha);
}
}
SSAInt DrawColumnCodegen::ColormapSample(SSAInt sample_index)
{ {
SSAInt sample_index = frac >> FRACBITS;
return colormap[source[sample_index].load().zext_int()].load().zext_int(); return colormap[source[sample_index].load().zext_int()].load().zext_int();
} }
SSAInt DrawColumnCodegen::TranslateSample(SSAInt frac) SSAInt DrawColumnCodegen::TranslateSample(SSAInt sample_index)
{ {
SSAInt sample_index = frac >> FRACBITS;
return translation[source[sample_index].load().zext_int()].load().zext_int(); return translation[source[sample_index].load().zext_int()].load().zext_int();
} }

View file

@ -10,6 +10,7 @@ enum class DrawColumnVariant
FillAddClamp, FillAddClamp,
FillSubClamp, FillSubClamp,
FillRevSubClamp, FillRevSubClamp,
DrawCopy,
Draw, Draw,
DrawAdd, DrawAdd,
DrawTranslated, DrawTranslated,
@ -23,13 +24,21 @@ enum class DrawColumnVariant
DrawRevSubClampTranslated DrawRevSubClampTranslated
}; };
enum class DrawColumnMethod
{
Normal,
Rt1,
Rt4
};
class DrawColumnCodegen : public DrawerCodegen class DrawColumnCodegen : public DrawerCodegen
{ {
public: public:
void Generate(DrawColumnVariant variant, SSAValue args, SSAValue thread_data); void Generate(DrawColumnVariant variant, DrawColumnMethod method, SSAValue args, SSAValue thread_data);
private: private:
void Loop(DrawColumnVariant variant, bool isSimpleShade); void Loop(DrawColumnVariant variant, DrawColumnMethod method, bool isSimpleShade);
SSAVec4i ProcessPixel(SSAInt sample_index, SSAVec4i bgcolor, DrawColumnVariant variant, bool isSimpleShade);
SSAInt ColormapSample(SSAInt frac); SSAInt ColormapSample(SSAInt frac);
SSAInt TranslateSample(SSAInt frac); SSAInt TranslateSample(SSAInt frac);
SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade); SSAVec4i Shade(SSAInt palIndex, bool isSimpleShade);

View file

@ -25,6 +25,7 @@ public:
SSAInt num_cores; SSAInt num_cores;
SSAInt pass_start_y; SSAInt pass_start_y;
SSAInt pass_end_y; SSAInt pass_end_y;
SSAUBytePtr temp;
}; };
class SSAShadeConstants class SSAShadeConstants

View file

@ -47,7 +47,7 @@ public:
LLVMDrawersImpl(); LLVMDrawersImpl();
private: private:
void CodegenDrawColumn(const char *name, DrawColumnVariant variant); void CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method);
void CodegenDrawSpan(const char *name, DrawSpanVariant variant); void CodegenDrawSpan(const char *name, DrawSpanVariant variant);
void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns); void CodegenDrawWall(const char *name, DrawWallVariant variant, int columns);
@ -84,22 +84,36 @@ LLVMDrawers *LLVMDrawers::Instance()
LLVMDrawersImpl::LLVMDrawersImpl() LLVMDrawersImpl::LLVMDrawersImpl()
{ {
CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill); CodegenDrawColumn("FillColumn", DrawColumnVariant::Fill, DrawColumnMethod::Normal);
CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd); CodegenDrawColumn("FillColumnAdd", DrawColumnVariant::FillAdd, DrawColumnMethod::Normal);
CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp); CodegenDrawColumn("FillColumnAddClamp", DrawColumnVariant::FillAddClamp, DrawColumnMethod::Normal);
CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp); CodegenDrawColumn("FillColumnSubClamp", DrawColumnVariant::FillSubClamp, DrawColumnMethod::Normal);
CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp); CodegenDrawColumn("FillColumnRevSubClamp", DrawColumnVariant::FillRevSubClamp, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw); CodegenDrawColumn("DrawColumn", DrawColumnVariant::Draw, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd); CodegenDrawColumn("DrawColumnAdd", DrawColumnVariant::DrawAdd, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated); CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd); CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnShaded", DrawColumnVariant::DrawShaded); CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnAddClamp", DrawColumnVariant::DrawAddClamp); CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated); CodegenDrawColumn("DrawColumnTranslated", DrawColumnVariant::DrawTranslated, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnSubClamp", DrawColumnVariant::DrawSubClamp); CodegenDrawColumn("DrawColumnTlatedAdd", DrawColumnVariant::DrawTlatedAdd, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated); CodegenDrawColumn("DrawColumnAddClampTranslated", DrawColumnVariant::DrawAddClampTranslated, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnRevSubClamp", DrawColumnVariant::DrawRevSubClamp); CodegenDrawColumn("DrawColumnSubClampTranslated", DrawColumnVariant::DrawSubClampTranslated, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated); CodegenDrawColumn("DrawColumnRevSubClampTranslated", DrawColumnVariant::DrawRevSubClampTranslated, DrawColumnMethod::Normal);
CodegenDrawColumn("DrawColumnRt1", DrawColumnVariant::Draw, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt1RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt1);
CodegenDrawColumn("DrawColumnRt4", DrawColumnVariant::Draw, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4Copy", DrawColumnVariant::DrawCopy, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4Add", DrawColumnVariant::DrawAdd, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4Shaded", DrawColumnVariant::DrawShaded, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4AddClamp", DrawColumnVariant::DrawAddClamp, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4SubClamp", DrawColumnVariant::DrawSubClamp, DrawColumnMethod::Rt4);
CodegenDrawColumn("DrawColumnRt4RevSubClamp", DrawColumnVariant::DrawRevSubClamp, DrawColumnMethod::Rt4);
CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque); CodegenDrawSpan("DrawSpan", DrawSpanVariant::Opaque);
CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked); CodegenDrawSpan("DrawSpanMasked", DrawSpanVariant::Masked);
CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent); CodegenDrawSpan("DrawSpanTranslucent", DrawSpanVariant::Translucent);
@ -129,15 +143,29 @@ LLVMDrawersImpl::LLVMDrawersImpl()
FillColumnRevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumnRevSubClamp"); FillColumnRevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("FillColumnRevSubClamp");
DrawColumn = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumn"); DrawColumn = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumn");
DrawColumnAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAdd"); DrawColumnAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAdd");
DrawColumnTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnTranslated");
DrawColumnTlatedAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnTlatedAdd");
DrawColumnShaded = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnShaded"); DrawColumnShaded = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnShaded");
DrawColumnAddClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAddClamp"); DrawColumnAddClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAddClamp");
DrawColumnAddClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAddClampTranslated");
DrawColumnSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnSubClamp"); DrawColumnSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnSubClamp");
DrawColumnSubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnSubClampTranslated");
DrawColumnRevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRevSubClamp"); DrawColumnRevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRevSubClamp");
DrawColumnTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnTranslated");
DrawColumnTlatedAdd = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnTlatedAdd");
DrawColumnAddClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnAddClampTranslated");
DrawColumnSubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnSubClampTranslated");
DrawColumnRevSubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRevSubClampTranslated"); DrawColumnRevSubClampTranslated = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRevSubClampTranslated");
DrawColumnRt1 = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1");
DrawColumnRt1Copy = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1Copy");
DrawColumnRt1Add = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1Add");
DrawColumnRt1Shaded = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1Shaded");
DrawColumnRt1AddClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1AddClamp");
DrawColumnRt1SubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1SubClamp");
DrawColumnRt1RevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt1RevSubClamp");
DrawColumnRt4 = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4");
DrawColumnRt4Copy = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4Copy");
DrawColumnRt4Add = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4Add");
DrawColumnRt4Shaded = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4Shaded");
DrawColumnRt4AddClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4AddClamp");
DrawColumnRt4SubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4SubClamp");
DrawColumnRt4RevSubClamp = mProgram.GetProcAddress<void(const DrawColumnArgs *, const WorkerThreadData *)>("DrawColumnRt4RevSubClamp");
DrawSpan = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpan"); DrawSpan = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpan");
DrawSpanMasked = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMasked"); DrawSpanMasked = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanMasked");
DrawSpanTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanTranslucent"); DrawSpanTranslucent = mProgram.GetProcAddress<void(const DrawSpanArgs *)>("DrawSpanTranslucent");
@ -160,7 +188,7 @@ LLVMDrawersImpl::LLVMDrawersImpl()
mProgram.StopLogFatalErrors(); mProgram.StopLogFatalErrors();
} }
void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant) void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant variant, DrawColumnMethod method)
{ {
llvm::IRBuilder<> builder(mProgram.context()); llvm::IRBuilder<> builder(mProgram.context());
SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder); SSAScope ssa_scope(&mProgram.context(), mProgram.module(), &builder);
@ -171,7 +199,7 @@ void LLVMDrawersImpl::CodegenDrawColumn(const char *name, DrawColumnVariant vari
function.create_public(); function.create_public();
DrawColumnCodegen codegen; DrawColumnCodegen codegen;
codegen.Generate(variant, function.parameter(0), function.parameter(1)); codegen.Generate(variant, method, function.parameter(0), function.parameter(1));
builder.CreateRetVoid(); builder.CreateRetVoid();
@ -310,6 +338,7 @@ llvm::Type *LLVMDrawersImpl::GetWorkerThreadDataStruct(llvm::LLVMContext &contex
std::vector<llvm::Type *> elements; std::vector<llvm::Type *> elements;
for (int i = 0; i < 4; i++) for (int i = 0; i < 4; i++)
elements.push_back(llvm::Type::getInt32Ty(context)); elements.push_back(llvm::Type::getInt32Ty(context));
elements.push_back(llvm::Type::getInt8PtrTy(context));
return llvm::StructType::get(context, elements, false)->getPointerTo(); return llvm::StructType::get(context, elements, false)->getPointerTo();
} }

View file

@ -7,6 +7,7 @@ struct WorkerThreadData
int32_t num_cores; int32_t num_cores;
int32_t pass_start_y; int32_t pass_start_y;
int32_t pass_end_y; int32_t pass_end_y;
uint32_t *temp;
}; };
struct DrawWallArgs struct DrawWallArgs
@ -122,20 +123,34 @@ public:
void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnShaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnTlatedAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnAddClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*DrawColumnRevSubClampTranslated)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumn)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnAdd)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnAddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr; void(*FillColumnRevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt1RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4Copy)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4Add)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4Shaded)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4AddClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4SubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawColumnRt4RevSubClamp)(const DrawColumnArgs *, const WorkerThreadData *) = nullptr;
void(*DrawSpan)(const DrawSpanArgs *) = nullptr; void(*DrawSpan)(const DrawSpanArgs *) = nullptr;
void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr; void(*DrawSpanMasked)(const DrawSpanArgs *) = nullptr;

View file

@ -334,6 +334,7 @@ public:
class name##LLVMCommand : public base \ class name##LLVMCommand : public base \
{ \ { \
public: \ public: \
using base::base; \
void Execute(DrawerThread *thread) override \ void Execute(DrawerThread *thread) override \
{ \ { \
WorkerThreadData d = ThreadData(thread); \ WorkerThreadData d = ThreadData(thread); \

View file

@ -43,6 +43,7 @@
#include "r_things.h" #include "r_things.h"
#include "v_video.h" #include "v_video.h"
#include "r_draw_rgba.h" #include "r_draw_rgba.h"
#include "r_compiler/llvmdrawers.h"
#ifndef NO_SSE #ifndef NO_SSE
#include <emmintrin.h> #include <emmintrin.h>
#endif #endif
@ -89,6 +90,89 @@ extern unsigned int *horizspan[4];
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
class DrawColumnRt1LLVMCommand : public DrawerCommand
{
protected:
DrawColumnArgs args;
WorkerThreadData ThreadData(DrawerThread *thread)
{
WorkerThreadData d;
d.core = thread->core;
d.num_cores = thread->num_cores;
d.pass_start_y = thread->pass_start_y;
d.pass_end_y = thread->pass_end_y;
d.temp = thread->dc_temp_rgba;
return d;
}
public:
DrawColumnRt1LLVMCommand(int hx, int sx, int yl, int yh)
{
args.dest = (uint32_t*)dc_destorg + ylookup[yl] + sx;
args.source = nullptr;
args.colormap = dc_colormap;
args.translation = dc_translation;
args.basecolors = (const uint32_t *)GPalette.BaseColors;
args.pitch = dc_pitch;
args.count = yh - yl + 1;
args.dest_y = yl;
args.iscale = dc_iscale;
args.texturefrac = hx;
args.light = LightBgra::calc_light_multiplier(dc_light);
args.color = LightBgra::shade_pal_index_simple(dc_color, args.light);
args.srccolor = dc_srccolor_bgra;
args.srcalpha = dc_srcalpha >> (FRACBITS - 8);
args.destalpha = dc_destalpha >> (FRACBITS - 8);
args.light_red = dc_shade_constants.light_red;
args.light_green = dc_shade_constants.light_green;
args.light_blue = dc_shade_constants.light_blue;
args.light_alpha = dc_shade_constants.light_alpha;
args.fade_red = dc_shade_constants.fade_red;
args.fade_green = dc_shade_constants.fade_green;
args.fade_blue = dc_shade_constants.fade_blue;
args.fade_alpha = dc_shade_constants.fade_alpha;
args.desaturate = dc_shade_constants.desaturate;
args.flags = 0;
if (dc_shade_constants.simple_shade)
args.flags |= DrawColumnArgs::simple_shade;
}
void Execute(DrawerThread *thread) override
{
WorkerThreadData d = ThreadData(thread);
LLVMDrawers::Instance()->DrawColumnRt1(&args, &d);
}
};
#define DECLARE_DRAW_COMMAND(name, func, base) \
class name##LLVMCommand : public base \
{ \
public: \
using base::base; \
void Execute(DrawerThread *thread) override \
{ \
WorkerThreadData d = ThreadData(thread); \
LLVMDrawers::Instance()->func(&args, &d); \
} \
};
DECLARE_DRAW_COMMAND(DrawColumnRt1Copy, DrawColumnRt1Copy, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1Add, DrawColumnRt1Add, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1Shaded, DrawColumnRt1Shaded, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1AddClamp, DrawColumnRt1AddClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1SubClamp, DrawColumnRt1SubClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt1RevSubClamp, DrawColumnRt1RevSubClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4, DrawColumnRt4, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4Copy, DrawColumnRt4Copy, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4Add, DrawColumnRt4Add, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4Shaded, DrawColumnRt4Shaded, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4AddClamp, DrawColumnRt4AddClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4SubClamp, DrawColumnRt4SubClamp, DrawColumnRt1LLVMCommand);
DECLARE_DRAW_COMMAND(DrawColumnRt4RevSubClamp, DrawColumnRt4RevSubClamp, DrawColumnRt1LLVMCommand);
/////////////////////////////////////////////////////////////////////////////
class DrawerRt1colCommand : public DrawerCommand class DrawerRt1colCommand : public DrawerCommand
{ {
public: public:
@ -756,7 +840,7 @@ public:
// Copies one span at hx to the screen at sx. // Copies one span at hx to the screen at sx.
void rt_copy1col_rgba (int hx, int sx, int yl, int yh) void rt_copy1col_rgba (int hx, int sx, int yl, int yh)
{ {
DrawerCommandQueue::QueueCommand<RtCopy1colRGBACommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1CopyLLVMCommand>(hx, sx, yl, yh);
} }
// Copies all four spans to the screen starting at sx. // Copies all four spans to the screen starting at sx.
@ -772,17 +856,13 @@ void rt_copy4cols_rgba (int sx, int yl, int yh)
// Maps one span at hx to the screen at sx. // Maps one span at hx to the screen at sx.
void rt_map1col_rgba (int hx, int sx, int yl, int yh) void rt_map1col_rgba (int hx, int sx, int yl, int yh)
{ {
DrawerCommandQueue::QueueCommand<RtMap1colRGBACommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1LLVMCommand>(hx, sx, yl, yh);
} }
// Maps all four spans to the screen starting at sx. // Maps all four spans to the screen starting at sx.
void rt_map4cols_rgba (int sx, int yl, int yh) void rt_map4cols_rgba (int sx, int yl, int yh)
{ {
#ifdef NO_SSE DrawerCommandQueue::QueueCommand<DrawColumnRt4LLVMCommand>(0, sx, yl, yh);
DrawerCommandQueue::QueueCommand<RtMap4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtMap4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
} }
void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh) void rt_Translate1col_rgba(const BYTE *translation, int hx, int yl, int yh)
@ -812,17 +892,13 @@ void rt_tlate4cols_rgba (int sx, int yl, int yh)
// Adds one span at hx to the screen at sx without clamping. // Adds one span at hx to the screen at sx without clamping.
void rt_add1col_rgba (int hx, int sx, int yl, int yh) void rt_add1col_rgba (int hx, int sx, int yl, int yh)
{ {
DrawerCommandQueue::QueueCommand<RtAdd1colRGBACommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1AddLLVMCommand>(hx, sx, yl, yh);
} }
// Adds all four spans to the screen starting at sx without clamping. // Adds all four spans to the screen starting at sx without clamping.
void rt_add4cols_rgba (int sx, int yl, int yh) void rt_add4cols_rgba (int sx, int yl, int yh)
{ {
#ifdef NO_SSE DrawerCommandQueue::QueueCommand<DrawColumnRt4AddLLVMCommand>(0, sx, yl, yh);
DrawerCommandQueue::QueueCommand<RtAdd4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtAdd4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
} }
// Translates and adds one span at hx to the screen at sx without clamping. // Translates and adds one span at hx to the screen at sx without clamping.
@ -842,33 +918,25 @@ void rt_tlateadd4cols_rgba(int sx, int yl, int yh)
// Shades one span at hx to the screen at sx. // Shades one span at hx to the screen at sx.
void rt_shaded1col_rgba (int hx, int sx, int yl, int yh) void rt_shaded1col_rgba (int hx, int sx, int yl, int yh)
{ {
DrawerCommandQueue::QueueCommand<RtShaded1colRGBACommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1ShadedLLVMCommand>(hx, sx, yl, yh);
} }
// Shades all four spans to the screen starting at sx. // Shades all four spans to the screen starting at sx.
void rt_shaded4cols_rgba (int sx, int yl, int yh) void rt_shaded4cols_rgba (int sx, int yl, int yh)
{ {
#ifdef NO_SSE DrawerCommandQueue::QueueCommand<DrawColumnRt4ShadedLLVMCommand>(0, sx, yl, yh);
DrawerCommandQueue::QueueCommand<RtShaded4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtShaded4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
} }
// Adds one span at hx to the screen at sx with clamping. // Adds one span at hx to the screen at sx with clamping.
void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh) void rt_addclamp1col_rgba (int hx, int sx, int yl, int yh)
{ {
DrawerCommandQueue::QueueCommand<RtAddClamp1colRGBACommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1AddClampLLVMCommand>(hx, sx, yl, yh);
} }
// Adds all four spans to the screen starting at sx with clamping. // Adds all four spans to the screen starting at sx with clamping.
void rt_addclamp4cols_rgba (int sx, int yl, int yh) void rt_addclamp4cols_rgba (int sx, int yl, int yh)
{ {
#ifdef NO_SSE DrawerCommandQueue::QueueCommand<DrawColumnRt4AddClampLLVMCommand>(0, sx, yl, yh);
DrawerCommandQueue::QueueCommand<RtAddClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtAddClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
} }
// Translates and adds one span at hx to the screen at sx with clamping. // Translates and adds one span at hx to the screen at sx with clamping.
@ -888,17 +956,13 @@ void rt_tlateaddclamp4cols_rgba (int sx, int yl, int yh)
// Subtracts one span at hx to the screen at sx with clamping. // Subtracts one span at hx to the screen at sx with clamping.
void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh) void rt_subclamp1col_rgba (int hx, int sx, int yl, int yh)
{ {
DrawerCommandQueue::QueueCommand<RtSubClamp1colRGBACommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1SubClampLLVMCommand>(hx, sx, yl, yh);
} }
// Subtracts all four spans to the screen starting at sx with clamping. // Subtracts all four spans to the screen starting at sx with clamping.
void rt_subclamp4cols_rgba (int sx, int yl, int yh) void rt_subclamp4cols_rgba (int sx, int yl, int yh)
{ {
#ifdef NO_SSE DrawerCommandQueue::QueueCommand<DrawColumnRt4SubClampLLVMCommand>(0, sx, yl, yh);
DrawerCommandQueue::QueueCommand<RtSubClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtSubClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
} }
// Translates and subtracts one span at hx to the screen at sx with clamping. // Translates and subtracts one span at hx to the screen at sx with clamping.
@ -918,17 +982,13 @@ void rt_tlatesubclamp4cols_rgba (int sx, int yl, int yh)
// Subtracts one span at hx from the screen at sx with clamping. // Subtracts one span at hx from the screen at sx with clamping.
void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh) void rt_revsubclamp1col_rgba (int hx, int sx, int yl, int yh)
{ {
DrawerCommandQueue::QueueCommand<RtRevSubClamp1colRGBACommand>(hx, sx, yl, yh); DrawerCommandQueue::QueueCommand<DrawColumnRt1RevSubClampLLVMCommand>(hx, sx, yl, yh);
} }
// Subtracts all four spans from the screen starting at sx with clamping. // Subtracts all four spans from the screen starting at sx with clamping.
void rt_revsubclamp4cols_rgba (int sx, int yl, int yh) void rt_revsubclamp4cols_rgba (int sx, int yl, int yh)
{ {
#ifdef NO_SSE DrawerCommandQueue::QueueCommand<DrawColumnRt4SubClampLLVMCommand>(0, sx, yl, yh);
DrawerCommandQueue::QueueCommand<RtRevSubClamp4colsRGBACommand>(sx, yl, yh);
#else
DrawerCommandQueue::QueueCommand<RtRevSubClamp4colsRGBA_SSE_Command>(sx, yl, yh);
#endif
} }
// Translates and subtracts one span at hx from the screen at sx with clamping. // Translates and subtracts one span at hx from the screen at sx with clamping.