Fully implemented codegen for DrawSpan

This commit is contained in:
Magnus Norddahl 2016-09-28 18:49:39 +02:00
parent 576fed5afc
commit 3aea3a0bee
7 changed files with 337 additions and 1225 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,7 @@
#pragma once
#include "r_compiler/ssa/ssa_value.h"
#include "r_compiler/ssa/ssa_vec4f.h"
#include "r_compiler/ssa/ssa_vec4i.h"
#include "r_compiler/ssa/ssa_vec8s.h"
@ -84,16 +85,9 @@ public:
SSAInt desaturate;
};
class FixedFunction
class DrawerCodegen
{
public:
FixedFunction();
void(*DrawSpan)(const RenderArgs *) = nullptr;
private:
void CodegenDrawSpan();
// LightBgra
SSAInt calc_light_multiplier(SSAInt light);
SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors);
@ -111,89 +105,57 @@ private:
// SampleBgra
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
};
class DrawSpanCodegen : public DrawerCodegen
{
public:
void Generate(SSAValue args);
private:
void LoopShade(bool isSimpleShade);
void LoopFilter(bool isSimpleShade, bool isNearestFilter);
SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64);
void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64);
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
SSAStack<SSAInt> stack_index, stack_xfrac, stack_yfrac;
SSAUBytePtr destorg;
SSAUBytePtr source;
SSAInt destpitch;
SSAInt xstep;
SSAInt ystep;
SSAInt x1;
SSAInt x2;
SSAInt y;
SSAInt xbits;
SSAInt ybits;
SSAInt light;
SSAInt srcalpha;
SSAInt destalpha;
SSAInt count;
SSAUBytePtr data;
SSAInt yshift;
SSAInt xshift;
SSAInt xmask;
SSABool is_64x64;
SSABool is_simple_shade;
SSABool is_nearest_filter;
SSAShadeConstants shade_constants;
};
class FixedFunction
{
public:
FixedFunction();
void(*DrawSpan)(const RenderArgs *) = nullptr;
private:
void CodegenDrawSpan();
static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context);
RenderProgram mProgram;
};
#if 0
class GlslProgram;
class GlslCodeGen;
class GlslFixedFunction
{
public:
GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen);
void codegen();
static llvm::Type *get_sampler_struct(llvm::LLVMContext &context);
private:
void codegen_draw_triangles(int num_vertex_in, int num_vertex_out);
void codegen_calc_window_positions();
void codegen_calc_polygon_face_direction();
void codegen_calc_polygon_y_range();
void codegen_update_polygon_edge();
void codegen_texture();
void codegen_normalize();
void codegen_reflect();
void codegen_max();
void codegen_pow();
void codegen_dot();
void codegen_mix();
struct OuterData
{
OuterData() : sampler() { }
SSAInt start;
SSAInt end;
SSAInt input_width;
SSAInt input_height;
SSAInt output_width;
SSAInt output_height;
SSAUBytePtr input_pixels;
SSAUBytePtr output_pixels_line;
SSAVec4fPtr sse_left_varying_in;
SSAVec4fPtr sse_right_varying_in;
int num_varyings;
SSAVec4f viewport_x;
SSAVec4f viewport_rcp_half_width;
SSAVec4f dx;
SSAVec4f dw;
SSAVec4f v1w;
SSAVec4f v1x;
llvm::Value *sampler;
};
void render_polygon(
SSAInt input_width,
SSAInt input_height,
SSAUBytePtr input_data,
SSAInt output_width,
SSAInt output_height,
SSAUBytePtr output_data,
SSAInt viewport_x,
SSAInt viewport_y,
SSAInt viewport_width,
SSAInt viewport_height,
SSAInt num_vertices,
std::vector<SSAVec4fPtr> fragment_ins,
SSAInt core,
SSAInt num_cores);
void codegen_render_scanline(int num_varyings);
void process_first_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
void process_last_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors);
void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest);
GlslProgram &program;
GlslCodeGen &vertex_codegen;
GlslCodeGen &fragment_codegen;
};
#endif

View File

@ -32,6 +32,11 @@ llvm::Type *SSAShort::llvm_type()
return llvm::Type::getInt16Ty(SSAScope::context());
}
SSAInt SSAShort::zext_int()
{
return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint()));
}
SSAShort operator+(const SSAShort &a, const SSAShort &b)
{
return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));

View File

@ -17,6 +17,8 @@ public:
static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); }
static llvm::Type *llvm_type();
SSAInt zext_int();
llvm::Value *v;
};

View File

@ -49,6 +49,18 @@ SSAVec4i::SSAVec4i(SSAInt i)
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint());
}
SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
v = llvm::ConstantVector::get(constants);
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
}
SSAVec4i::SSAVec4i(SSAVec4f f32)
: v(0)
{

View File

@ -16,6 +16,7 @@ public:
SSAVec4i(int constant);
SSAVec4i(int constant0, int constant1, int constant2, int constant3);
SSAVec4i(SSAInt i);
SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3);
explicit SSAVec4i(llvm::Value *v);
SSAVec4i(SSAVec4f f32);
SSAInt operator[](SSAInt index);

View File

@ -300,50 +300,43 @@ void DrawerCommandQueue::StopThreads()
/////////////////////////////////////////////////////////////////////////////
class DrawSpanFFCommand : public DrawerCommand
class DrawSpanLLVMCommand : public DrawerCommand
{
fixed_t _xfrac;
fixed_t _yfrac;
fixed_t _xstep;
fixed_t _ystep;
int _x1;
int _x2;
int _y;
int _xbits;
int _ybits;
BYTE * RESTRICT _destorg;
const uint32_t * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
bool _nearest_filter;
uint32_t _srcalpha;
uint32_t _destalpha;
RenderArgs args;
FixedFunction *_ff;
public:
DrawSpanFFCommand()
DrawSpanLLVMCommand()
{
_xfrac = ds_xfrac;
_yfrac = ds_yfrac;
_xstep = ds_xstep;
_ystep = ds_ystep;
_x1 = ds_x1;
_x2 = ds_x2;
_y = ds_y;
_xbits = ds_xbits;
_ybits = ds_ybits;
_destorg = dc_destorg;
_source = (const uint32_t*)ds_source;
_light = LightBgra::calc_light_multiplier(ds_light);
_shade_constants = ds_shade_constants;
_nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped);
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
_destalpha = dc_destalpha >> (FRACBITS - 8);
args.xfrac = ds_xfrac;
args.yfrac = ds_yfrac;
args.xstep = ds_xstep;
args.ystep = ds_ystep;
args.x1 = ds_x1;
args.x2 = ds_x2;
args.y = ds_y;
args.xbits = ds_xbits;
args.ybits = ds_ybits;
args.destorg = (uint32_t*)dc_destorg;
args.destpitch = dc_pitch;
args.source = (const uint32_t*)ds_source;
args.light = LightBgra::calc_light_multiplier(ds_light);
args.light_red = ds_shade_constants.light_red;
args.light_green = ds_shade_constants.light_green;
args.light_blue = ds_shade_constants.light_blue;
args.light_alpha = ds_shade_constants.light_alpha;
args.fade_red = ds_shade_constants.fade_red;
args.fade_green = ds_shade_constants.fade_green;
args.fade_blue = ds_shade_constants.fade_blue;
args.fade_alpha = ds_shade_constants.fade_alpha;
args.desaturate = ds_shade_constants.desaturate;
args.srcalpha = dc_srcalpha >> (FRACBITS - 8);
args.destalpha = dc_destalpha >> (FRACBITS - 8);
args.flags = 0;
if (ds_shade_constants.simple_shade)
args.flags |= RenderArgs::simple_shade;
if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped))
args.flags |= RenderArgs::nearest_filter;
static FixedFunction ff;
_ff = &ff;
@ -351,25 +344,8 @@ public:
void Execute(DrawerThread *thread) override
{
if (thread->skipped_by_thread(_y))
if (thread->skipped_by_thread(args.y))
return;
RenderArgs args;
args.destorg = (uint32_t *)_destorg;
args.source = _source;
args.destpitch = dc_pitch;
args.xfrac = _xfrac;
args.yfrac = _yfrac;
args.xstep = _xstep;
args.ystep = _ystep;
args.x1 = _x1;
args.x2 = _x2;
args.y = _y;
args.xbits = _xbits;
args.ybits = _ybits;
args.light = _light;
args.srcalpha = _srcalpha;
args.destalpha = _destalpha;
_ff->DrawSpan(&args);
}
};
@ -2777,7 +2753,7 @@ void R_DrawRevSubClampTranslatedColumn_rgba()
void R_DrawSpan_rgba()
{
DrawerCommandQueue::QueueCommand<DrawSpanFFCommand>();
DrawerCommandQueue::QueueCommand<DrawSpanLLVMCommand>();
/*
#ifdef NO_SSE
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();