mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-11 23:32:02 +00:00
Fully implemented codegen for DrawSpan
This commit is contained in:
parent
576fed5afc
commit
3aea3a0bee
7 changed files with 337 additions and 1225 deletions
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,7 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "r_compiler/ssa/ssa_value.h"
|
||||
#include "r_compiler/ssa/ssa_vec4f.h"
|
||||
#include "r_compiler/ssa/ssa_vec4i.h"
|
||||
#include "r_compiler/ssa/ssa_vec8s.h"
|
||||
|
@ -84,16 +85,9 @@ public:
|
|||
SSAInt desaturate;
|
||||
};
|
||||
|
||||
class FixedFunction
|
||||
class DrawerCodegen
|
||||
{
|
||||
public:
|
||||
FixedFunction();
|
||||
|
||||
void(*DrawSpan)(const RenderArgs *) = nullptr;
|
||||
|
||||
private:
|
||||
void CodegenDrawSpan();
|
||||
|
||||
// LightBgra
|
||||
SSAInt calc_light_multiplier(SSAInt light);
|
||||
SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors);
|
||||
|
@ -111,89 +105,57 @@ private:
|
|||
// SampleBgra
|
||||
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
|
||||
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
|
||||
};
|
||||
|
||||
class DrawSpanCodegen : public DrawerCodegen
|
||||
{
|
||||
public:
|
||||
void Generate(SSAValue args);
|
||||
|
||||
private:
|
||||
void LoopShade(bool isSimpleShade);
|
||||
void LoopFilter(bool isSimpleShade, bool isNearestFilter);
|
||||
SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64);
|
||||
void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64);
|
||||
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
|
||||
|
||||
SSAStack<SSAInt> stack_index, stack_xfrac, stack_yfrac;
|
||||
|
||||
SSAUBytePtr destorg;
|
||||
SSAUBytePtr source;
|
||||
SSAInt destpitch;
|
||||
SSAInt xstep;
|
||||
SSAInt ystep;
|
||||
SSAInt x1;
|
||||
SSAInt x2;
|
||||
SSAInt y;
|
||||
SSAInt xbits;
|
||||
SSAInt ybits;
|
||||
SSAInt light;
|
||||
SSAInt srcalpha;
|
||||
SSAInt destalpha;
|
||||
SSAInt count;
|
||||
SSAUBytePtr data;
|
||||
SSAInt yshift;
|
||||
SSAInt xshift;
|
||||
SSAInt xmask;
|
||||
SSABool is_64x64;
|
||||
SSABool is_simple_shade;
|
||||
SSABool is_nearest_filter;
|
||||
SSAShadeConstants shade_constants;
|
||||
};
|
||||
|
||||
class FixedFunction
|
||||
{
|
||||
public:
|
||||
FixedFunction();
|
||||
|
||||
void(*DrawSpan)(const RenderArgs *) = nullptr;
|
||||
|
||||
private:
|
||||
void CodegenDrawSpan();
|
||||
|
||||
static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context);
|
||||
|
||||
RenderProgram mProgram;
|
||||
};
|
||||
|
||||
#if 0
|
||||
|
||||
class GlslProgram;
|
||||
class GlslCodeGen;
|
||||
|
||||
class GlslFixedFunction
|
||||
{
|
||||
public:
|
||||
GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen);
|
||||
void codegen();
|
||||
static llvm::Type *get_sampler_struct(llvm::LLVMContext &context);
|
||||
|
||||
private:
|
||||
void codegen_draw_triangles(int num_vertex_in, int num_vertex_out);
|
||||
void codegen_calc_window_positions();
|
||||
void codegen_calc_polygon_face_direction();
|
||||
void codegen_calc_polygon_y_range();
|
||||
void codegen_update_polygon_edge();
|
||||
void codegen_texture();
|
||||
void codegen_normalize();
|
||||
void codegen_reflect();
|
||||
void codegen_max();
|
||||
void codegen_pow();
|
||||
void codegen_dot();
|
||||
void codegen_mix();
|
||||
|
||||
struct OuterData
|
||||
{
|
||||
OuterData() : sampler() { }
|
||||
|
||||
SSAInt start;
|
||||
SSAInt end;
|
||||
SSAInt input_width;
|
||||
SSAInt input_height;
|
||||
SSAInt output_width;
|
||||
SSAInt output_height;
|
||||
SSAUBytePtr input_pixels;
|
||||
SSAUBytePtr output_pixels_line;
|
||||
|
||||
SSAVec4fPtr sse_left_varying_in;
|
||||
SSAVec4fPtr sse_right_varying_in;
|
||||
int num_varyings;
|
||||
SSAVec4f viewport_x;
|
||||
SSAVec4f viewport_rcp_half_width;
|
||||
SSAVec4f dx;
|
||||
SSAVec4f dw;
|
||||
SSAVec4f v1w;
|
||||
SSAVec4f v1x;
|
||||
|
||||
llvm::Value *sampler;
|
||||
};
|
||||
|
||||
void render_polygon(
|
||||
SSAInt input_width,
|
||||
SSAInt input_height,
|
||||
SSAUBytePtr input_data,
|
||||
SSAInt output_width,
|
||||
SSAInt output_height,
|
||||
SSAUBytePtr output_data,
|
||||
SSAInt viewport_x,
|
||||
SSAInt viewport_y,
|
||||
SSAInt viewport_width,
|
||||
SSAInt viewport_height,
|
||||
SSAInt num_vertices,
|
||||
std::vector<SSAVec4fPtr> fragment_ins,
|
||||
SSAInt core,
|
||||
SSAInt num_cores);
|
||||
|
||||
void codegen_render_scanline(int num_varyings);
|
||||
void process_first_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
|
||||
void process_last_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
|
||||
void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors);
|
||||
void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest);
|
||||
|
||||
GlslProgram &program;
|
||||
GlslCodeGen &vertex_codegen;
|
||||
GlslCodeGen &fragment_codegen;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
|
|
@ -32,6 +32,11 @@ llvm::Type *SSAShort::llvm_type()
|
|||
return llvm::Type::getInt16Ty(SSAScope::context());
|
||||
}
|
||||
|
||||
SSAInt SSAShort::zext_int()
|
||||
{
|
||||
return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint()));
|
||||
}
|
||||
|
||||
SSAShort operator+(const SSAShort &a, const SSAShort &b)
|
||||
{
|
||||
return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
|
||||
|
|
|
@ -17,6 +17,8 @@ public:
|
|||
static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); }
|
||||
static llvm::Type *llvm_type();
|
||||
|
||||
SSAInt zext_int();
|
||||
|
||||
llvm::Value *v;
|
||||
};
|
||||
|
||||
|
|
|
@ -49,6 +49,18 @@ SSAVec4i::SSAVec4i(SSAInt i)
|
|||
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint());
|
||||
}
|
||||
|
||||
SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
|
||||
: v(0)
|
||||
{
|
||||
std::vector<llvm::Constant*> constants;
|
||||
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
|
||||
v = llvm::ConstantVector::get(constants);
|
||||
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
|
||||
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
|
||||
}
|
||||
|
||||
SSAVec4i::SSAVec4i(SSAVec4f f32)
|
||||
: v(0)
|
||||
{
|
||||
|
|
|
@ -16,6 +16,7 @@ public:
|
|||
SSAVec4i(int constant);
|
||||
SSAVec4i(int constant0, int constant1, int constant2, int constant3);
|
||||
SSAVec4i(SSAInt i);
|
||||
SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3);
|
||||
explicit SSAVec4i(llvm::Value *v);
|
||||
SSAVec4i(SSAVec4f f32);
|
||||
SSAInt operator[](SSAInt index);
|
||||
|
|
|
@ -300,50 +300,43 @@ void DrawerCommandQueue::StopThreads()
|
|||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
class DrawSpanFFCommand : public DrawerCommand
|
||||
class DrawSpanLLVMCommand : public DrawerCommand
|
||||
{
|
||||
fixed_t _xfrac;
|
||||
fixed_t _yfrac;
|
||||
fixed_t _xstep;
|
||||
fixed_t _ystep;
|
||||
int _x1;
|
||||
int _x2;
|
||||
int _y;
|
||||
int _xbits;
|
||||
int _ybits;
|
||||
BYTE * RESTRICT _destorg;
|
||||
|
||||
const uint32_t * RESTRICT _source;
|
||||
uint32_t _light;
|
||||
ShadeConstants _shade_constants;
|
||||
bool _nearest_filter;
|
||||
|
||||
uint32_t _srcalpha;
|
||||
uint32_t _destalpha;
|
||||
|
||||
RenderArgs args;
|
||||
FixedFunction *_ff;
|
||||
|
||||
public:
|
||||
DrawSpanFFCommand()
|
||||
DrawSpanLLVMCommand()
|
||||
{
|
||||
_xfrac = ds_xfrac;
|
||||
_yfrac = ds_yfrac;
|
||||
_xstep = ds_xstep;
|
||||
_ystep = ds_ystep;
|
||||
_x1 = ds_x1;
|
||||
_x2 = ds_x2;
|
||||
_y = ds_y;
|
||||
_xbits = ds_xbits;
|
||||
_ybits = ds_ybits;
|
||||
_destorg = dc_destorg;
|
||||
|
||||
_source = (const uint32_t*)ds_source;
|
||||
_light = LightBgra::calc_light_multiplier(ds_light);
|
||||
_shade_constants = ds_shade_constants;
|
||||
_nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped);
|
||||
|
||||
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
|
||||
_destalpha = dc_destalpha >> (FRACBITS - 8);
|
||||
args.xfrac = ds_xfrac;
|
||||
args.yfrac = ds_yfrac;
|
||||
args.xstep = ds_xstep;
|
||||
args.ystep = ds_ystep;
|
||||
args.x1 = ds_x1;
|
||||
args.x2 = ds_x2;
|
||||
args.y = ds_y;
|
||||
args.xbits = ds_xbits;
|
||||
args.ybits = ds_ybits;
|
||||
args.destorg = (uint32_t*)dc_destorg;
|
||||
args.destpitch = dc_pitch;
|
||||
args.source = (const uint32_t*)ds_source;
|
||||
args.light = LightBgra::calc_light_multiplier(ds_light);
|
||||
args.light_red = ds_shade_constants.light_red;
|
||||
args.light_green = ds_shade_constants.light_green;
|
||||
args.light_blue = ds_shade_constants.light_blue;
|
||||
args.light_alpha = ds_shade_constants.light_alpha;
|
||||
args.fade_red = ds_shade_constants.fade_red;
|
||||
args.fade_green = ds_shade_constants.fade_green;
|
||||
args.fade_blue = ds_shade_constants.fade_blue;
|
||||
args.fade_alpha = ds_shade_constants.fade_alpha;
|
||||
args.desaturate = ds_shade_constants.desaturate;
|
||||
args.srcalpha = dc_srcalpha >> (FRACBITS - 8);
|
||||
args.destalpha = dc_destalpha >> (FRACBITS - 8);
|
||||
args.flags = 0;
|
||||
if (ds_shade_constants.simple_shade)
|
||||
args.flags |= RenderArgs::simple_shade;
|
||||
if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped))
|
||||
args.flags |= RenderArgs::nearest_filter;
|
||||
|
||||
static FixedFunction ff;
|
||||
_ff = &ff;
|
||||
|
@ -351,25 +344,8 @@ public:
|
|||
|
||||
void Execute(DrawerThread *thread) override
|
||||
{
|
||||
if (thread->skipped_by_thread(_y))
|
||||
if (thread->skipped_by_thread(args.y))
|
||||
return;
|
||||
|
||||
RenderArgs args;
|
||||
args.destorg = (uint32_t *)_destorg;
|
||||
args.source = _source;
|
||||
args.destpitch = dc_pitch;
|
||||
args.xfrac = _xfrac;
|
||||
args.yfrac = _yfrac;
|
||||
args.xstep = _xstep;
|
||||
args.ystep = _ystep;
|
||||
args.x1 = _x1;
|
||||
args.x2 = _x2;
|
||||
args.y = _y;
|
||||
args.xbits = _xbits;
|
||||
args.ybits = _ybits;
|
||||
args.light = _light;
|
||||
args.srcalpha = _srcalpha;
|
||||
args.destalpha = _destalpha;
|
||||
_ff->DrawSpan(&args);
|
||||
}
|
||||
};
|
||||
|
@ -2777,7 +2753,7 @@ void R_DrawRevSubClampTranslatedColumn_rgba()
|
|||
|
||||
void R_DrawSpan_rgba()
|
||||
{
|
||||
DrawerCommandQueue::QueueCommand<DrawSpanFFCommand>();
|
||||
DrawerCommandQueue::QueueCommand<DrawSpanLLVMCommand>();
|
||||
/*
|
||||
#ifdef NO_SSE
|
||||
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
|
||||
|
|
Loading…
Reference in a new issue