Fully implemented codegen for DrawSpan

This commit is contained in:
Magnus Norddahl 2016-09-28 18:49:39 +02:00
parent 576fed5afc
commit 3aea3a0bee
7 changed files with 337 additions and 1225 deletions

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,7 @@
#pragma once #pragma once
#include "r_compiler/ssa/ssa_value.h"
#include "r_compiler/ssa/ssa_vec4f.h" #include "r_compiler/ssa/ssa_vec4f.h"
#include "r_compiler/ssa/ssa_vec4i.h" #include "r_compiler/ssa/ssa_vec4i.h"
#include "r_compiler/ssa/ssa_vec8s.h" #include "r_compiler/ssa/ssa_vec8s.h"
@ -84,16 +85,9 @@ public:
SSAInt desaturate; SSAInt desaturate;
}; };
class FixedFunction class DrawerCodegen
{ {
public: public:
FixedFunction();
void(*DrawSpan)(const RenderArgs *) = nullptr;
private:
void CodegenDrawSpan();
// LightBgra // LightBgra
SSAInt calc_light_multiplier(SSAInt light); SSAInt calc_light_multiplier(SSAInt light);
SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors); SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors);
@ -111,89 +105,57 @@ private:
// SampleBgra // SampleBgra
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height); SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits); SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
};
class DrawSpanCodegen : public DrawerCodegen
{
public:
void Generate(SSAValue args);
private:
void LoopShade(bool isSimpleShade);
void LoopFilter(bool isSimpleShade, bool isNearestFilter);
SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64);
void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64);
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
SSAStack<SSAInt> stack_index, stack_xfrac, stack_yfrac;
SSAUBytePtr destorg;
SSAUBytePtr source;
SSAInt destpitch;
SSAInt xstep;
SSAInt ystep;
SSAInt x1;
SSAInt x2;
SSAInt y;
SSAInt xbits;
SSAInt ybits;
SSAInt light;
SSAInt srcalpha;
SSAInt destalpha;
SSAInt count;
SSAUBytePtr data;
SSAInt yshift;
SSAInt xshift;
SSAInt xmask;
SSABool is_64x64;
SSABool is_simple_shade;
SSABool is_nearest_filter;
SSAShadeConstants shade_constants;
};
class FixedFunction
{
public:
FixedFunction();
void(*DrawSpan)(const RenderArgs *) = nullptr;
private:
void CodegenDrawSpan();
static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context); static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context);
RenderProgram mProgram; RenderProgram mProgram;
}; };
#if 0
class GlslProgram;
class GlslCodeGen;
class GlslFixedFunction
{
public:
GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen);
void codegen();
static llvm::Type *get_sampler_struct(llvm::LLVMContext &context);
private:
void codegen_draw_triangles(int num_vertex_in, int num_vertex_out);
void codegen_calc_window_positions();
void codegen_calc_polygon_face_direction();
void codegen_calc_polygon_y_range();
void codegen_update_polygon_edge();
void codegen_texture();
void codegen_normalize();
void codegen_reflect();
void codegen_max();
void codegen_pow();
void codegen_dot();
void codegen_mix();
struct OuterData
{
OuterData() : sampler() { }
SSAInt start;
SSAInt end;
SSAInt input_width;
SSAInt input_height;
SSAInt output_width;
SSAInt output_height;
SSAUBytePtr input_pixels;
SSAUBytePtr output_pixels_line;
SSAVec4fPtr sse_left_varying_in;
SSAVec4fPtr sse_right_varying_in;
int num_varyings;
SSAVec4f viewport_x;
SSAVec4f viewport_rcp_half_width;
SSAVec4f dx;
SSAVec4f dw;
SSAVec4f v1w;
SSAVec4f v1x;
llvm::Value *sampler;
};
void render_polygon(
SSAInt input_width,
SSAInt input_height,
SSAUBytePtr input_data,
SSAInt output_width,
SSAInt output_height,
SSAUBytePtr output_data,
SSAInt viewport_x,
SSAInt viewport_y,
SSAInt viewport_width,
SSAInt viewport_height,
SSAInt num_vertices,
std::vector<SSAVec4fPtr> fragment_ins,
SSAInt core,
SSAInt num_cores);
void codegen_render_scanline(int num_varyings);
void process_first_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
void process_last_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors);
void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest);
GlslProgram &program;
GlslCodeGen &vertex_codegen;
GlslCodeGen &fragment_codegen;
};
#endif

View file

@ -32,6 +32,11 @@ llvm::Type *SSAShort::llvm_type()
return llvm::Type::getInt16Ty(SSAScope::context()); return llvm::Type::getInt16Ty(SSAScope::context());
} }
SSAInt SSAShort::zext_int()
{
return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint()));
}
SSAShort operator+(const SSAShort &a, const SSAShort &b) SSAShort operator+(const SSAShort &a, const SSAShort &b)
{ {
return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint())); return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));

View file

@ -17,6 +17,8 @@ public:
static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); } static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); }
static llvm::Type *llvm_type(); static llvm::Type *llvm_type();
SSAInt zext_int();
llvm::Value *v; llvm::Value *v;
}; };

View file

@ -49,6 +49,18 @@ SSAVec4i::SSAVec4i(SSAInt i)
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint()); v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint());
} }
SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
: v(0)
{
std::vector<llvm::Constant*> constants;
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
v = llvm::ConstantVector::get(constants);
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
}
SSAVec4i::SSAVec4i(SSAVec4f f32) SSAVec4i::SSAVec4i(SSAVec4f f32)
: v(0) : v(0)
{ {

View file

@ -16,6 +16,7 @@ public:
SSAVec4i(int constant); SSAVec4i(int constant);
SSAVec4i(int constant0, int constant1, int constant2, int constant3); SSAVec4i(int constant0, int constant1, int constant2, int constant3);
SSAVec4i(SSAInt i); SSAVec4i(SSAInt i);
SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3);
explicit SSAVec4i(llvm::Value *v); explicit SSAVec4i(llvm::Value *v);
SSAVec4i(SSAVec4f f32); SSAVec4i(SSAVec4f f32);
SSAInt operator[](SSAInt index); SSAInt operator[](SSAInt index);

View file

@ -300,50 +300,43 @@ void DrawerCommandQueue::StopThreads()
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
class DrawSpanFFCommand : public DrawerCommand class DrawSpanLLVMCommand : public DrawerCommand
{ {
fixed_t _xfrac; RenderArgs args;
fixed_t _yfrac;
fixed_t _xstep;
fixed_t _ystep;
int _x1;
int _x2;
int _y;
int _xbits;
int _ybits;
BYTE * RESTRICT _destorg;
const uint32_t * RESTRICT _source;
uint32_t _light;
ShadeConstants _shade_constants;
bool _nearest_filter;
uint32_t _srcalpha;
uint32_t _destalpha;
FixedFunction *_ff; FixedFunction *_ff;
public: public:
DrawSpanFFCommand() DrawSpanLLVMCommand()
{ {
_xfrac = ds_xfrac; args.xfrac = ds_xfrac;
_yfrac = ds_yfrac; args.yfrac = ds_yfrac;
_xstep = ds_xstep; args.xstep = ds_xstep;
_ystep = ds_ystep; args.ystep = ds_ystep;
_x1 = ds_x1; args.x1 = ds_x1;
_x2 = ds_x2; args.x2 = ds_x2;
_y = ds_y; args.y = ds_y;
_xbits = ds_xbits; args.xbits = ds_xbits;
_ybits = ds_ybits; args.ybits = ds_ybits;
_destorg = dc_destorg; args.destorg = (uint32_t*)dc_destorg;
args.destpitch = dc_pitch;
_source = (const uint32_t*)ds_source; args.source = (const uint32_t*)ds_source;
_light = LightBgra::calc_light_multiplier(ds_light); args.light = LightBgra::calc_light_multiplier(ds_light);
_shade_constants = ds_shade_constants; args.light_red = ds_shade_constants.light_red;
_nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped); args.light_green = ds_shade_constants.light_green;
args.light_blue = ds_shade_constants.light_blue;
_srcalpha = dc_srcalpha >> (FRACBITS - 8); args.light_alpha = ds_shade_constants.light_alpha;
_destalpha = dc_destalpha >> (FRACBITS - 8); args.fade_red = ds_shade_constants.fade_red;
args.fade_green = ds_shade_constants.fade_green;
args.fade_blue = ds_shade_constants.fade_blue;
args.fade_alpha = ds_shade_constants.fade_alpha;
args.desaturate = ds_shade_constants.desaturate;
args.srcalpha = dc_srcalpha >> (FRACBITS - 8);
args.destalpha = dc_destalpha >> (FRACBITS - 8);
args.flags = 0;
if (ds_shade_constants.simple_shade)
args.flags |= RenderArgs::simple_shade;
if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped))
args.flags |= RenderArgs::nearest_filter;
static FixedFunction ff; static FixedFunction ff;
_ff = &ff; _ff = &ff;
@ -351,25 +344,8 @@ public:
void Execute(DrawerThread *thread) override void Execute(DrawerThread *thread) override
{ {
if (thread->skipped_by_thread(_y)) if (thread->skipped_by_thread(args.y))
return; return;
RenderArgs args;
args.destorg = (uint32_t *)_destorg;
args.source = _source;
args.destpitch = dc_pitch;
args.xfrac = _xfrac;
args.yfrac = _yfrac;
args.xstep = _xstep;
args.ystep = _ystep;
args.x1 = _x1;
args.x2 = _x2;
args.y = _y;
args.xbits = _xbits;
args.ybits = _ybits;
args.light = _light;
args.srcalpha = _srcalpha;
args.destalpha = _destalpha;
_ff->DrawSpan(&args); _ff->DrawSpan(&args);
} }
}; };
@ -2777,7 +2753,7 @@ void R_DrawRevSubClampTranslatedColumn_rgba()
void R_DrawSpan_rgba() void R_DrawSpan_rgba()
{ {
DrawerCommandQueue::QueueCommand<DrawSpanFFCommand>(); DrawerCommandQueue::QueueCommand<DrawSpanLLVMCommand>();
/* /*
#ifdef NO_SSE #ifdef NO_SSE
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>(); DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();