mirror of
https://github.com/ZDoom/gzdoom.git
synced 2024-11-13 16:07:45 +00:00
Fully implemented codegen for DrawSpan
This commit is contained in:
parent
576fed5afc
commit
3aea3a0bee
7 changed files with 337 additions and 1225 deletions
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "r_compiler/ssa/ssa_value.h"
|
||||||
#include "r_compiler/ssa/ssa_vec4f.h"
|
#include "r_compiler/ssa/ssa_vec4f.h"
|
||||||
#include "r_compiler/ssa/ssa_vec4i.h"
|
#include "r_compiler/ssa/ssa_vec4i.h"
|
||||||
#include "r_compiler/ssa/ssa_vec8s.h"
|
#include "r_compiler/ssa/ssa_vec8s.h"
|
||||||
|
@ -84,16 +85,9 @@ public:
|
||||||
SSAInt desaturate;
|
SSAInt desaturate;
|
||||||
};
|
};
|
||||||
|
|
||||||
class FixedFunction
|
class DrawerCodegen
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
FixedFunction();
|
|
||||||
|
|
||||||
void(*DrawSpan)(const RenderArgs *) = nullptr;
|
|
||||||
|
|
||||||
private:
|
|
||||||
void CodegenDrawSpan();
|
|
||||||
|
|
||||||
// LightBgra
|
// LightBgra
|
||||||
SSAInt calc_light_multiplier(SSAInt light);
|
SSAInt calc_light_multiplier(SSAInt light);
|
||||||
SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors);
|
SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors);
|
||||||
|
@ -111,89 +105,57 @@ private:
|
||||||
// SampleBgra
|
// SampleBgra
|
||||||
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
|
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
|
||||||
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
|
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
|
||||||
|
};
|
||||||
|
|
||||||
|
class DrawSpanCodegen : public DrawerCodegen
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
void Generate(SSAValue args);
|
||||||
|
|
||||||
|
private:
|
||||||
|
void LoopShade(bool isSimpleShade);
|
||||||
|
void LoopFilter(bool isSimpleShade, bool isNearestFilter);
|
||||||
|
SSAInt Loop4x(bool isSimpleShade, bool isNearestFilter, bool is64x64);
|
||||||
|
void Loop(SSAInt start, bool isSimpleShade, bool isNearestFilter, bool is64x64);
|
||||||
|
SSAVec4i Sample(SSAInt xfrac, SSAInt yfrac, bool isNearestFilter, bool is64x64);
|
||||||
|
|
||||||
|
SSAStack<SSAInt> stack_index, stack_xfrac, stack_yfrac;
|
||||||
|
|
||||||
|
SSAUBytePtr destorg;
|
||||||
|
SSAUBytePtr source;
|
||||||
|
SSAInt destpitch;
|
||||||
|
SSAInt xstep;
|
||||||
|
SSAInt ystep;
|
||||||
|
SSAInt x1;
|
||||||
|
SSAInt x2;
|
||||||
|
SSAInt y;
|
||||||
|
SSAInt xbits;
|
||||||
|
SSAInt ybits;
|
||||||
|
SSAInt light;
|
||||||
|
SSAInt srcalpha;
|
||||||
|
SSAInt destalpha;
|
||||||
|
SSAInt count;
|
||||||
|
SSAUBytePtr data;
|
||||||
|
SSAInt yshift;
|
||||||
|
SSAInt xshift;
|
||||||
|
SSAInt xmask;
|
||||||
|
SSABool is_64x64;
|
||||||
|
SSABool is_simple_shade;
|
||||||
|
SSABool is_nearest_filter;
|
||||||
|
SSAShadeConstants shade_constants;
|
||||||
|
};
|
||||||
|
|
||||||
|
class FixedFunction
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
FixedFunction();
|
||||||
|
|
||||||
|
void(*DrawSpan)(const RenderArgs *) = nullptr;
|
||||||
|
|
||||||
|
private:
|
||||||
|
void CodegenDrawSpan();
|
||||||
|
|
||||||
static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context);
|
static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context);
|
||||||
|
|
||||||
RenderProgram mProgram;
|
RenderProgram mProgram;
|
||||||
};
|
};
|
||||||
|
|
||||||
#if 0
|
|
||||||
|
|
||||||
class GlslProgram;
|
|
||||||
class GlslCodeGen;
|
|
||||||
|
|
||||||
class GlslFixedFunction
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
GlslFixedFunction(GlslProgram &program, GlslCodeGen &vertex_codegen, GlslCodeGen &fragment_codegen);
|
|
||||||
void codegen();
|
|
||||||
static llvm::Type *get_sampler_struct(llvm::LLVMContext &context);
|
|
||||||
|
|
||||||
private:
|
|
||||||
void codegen_draw_triangles(int num_vertex_in, int num_vertex_out);
|
|
||||||
void codegen_calc_window_positions();
|
|
||||||
void codegen_calc_polygon_face_direction();
|
|
||||||
void codegen_calc_polygon_y_range();
|
|
||||||
void codegen_update_polygon_edge();
|
|
||||||
void codegen_texture();
|
|
||||||
void codegen_normalize();
|
|
||||||
void codegen_reflect();
|
|
||||||
void codegen_max();
|
|
||||||
void codegen_pow();
|
|
||||||
void codegen_dot();
|
|
||||||
void codegen_mix();
|
|
||||||
|
|
||||||
struct OuterData
|
|
||||||
{
|
|
||||||
OuterData() : sampler() { }
|
|
||||||
|
|
||||||
SSAInt start;
|
|
||||||
SSAInt end;
|
|
||||||
SSAInt input_width;
|
|
||||||
SSAInt input_height;
|
|
||||||
SSAInt output_width;
|
|
||||||
SSAInt output_height;
|
|
||||||
SSAUBytePtr input_pixels;
|
|
||||||
SSAUBytePtr output_pixels_line;
|
|
||||||
|
|
||||||
SSAVec4fPtr sse_left_varying_in;
|
|
||||||
SSAVec4fPtr sse_right_varying_in;
|
|
||||||
int num_varyings;
|
|
||||||
SSAVec4f viewport_x;
|
|
||||||
SSAVec4f viewport_rcp_half_width;
|
|
||||||
SSAVec4f dx;
|
|
||||||
SSAVec4f dw;
|
|
||||||
SSAVec4f v1w;
|
|
||||||
SSAVec4f v1x;
|
|
||||||
|
|
||||||
llvm::Value *sampler;
|
|
||||||
};
|
|
||||||
|
|
||||||
void render_polygon(
|
|
||||||
SSAInt input_width,
|
|
||||||
SSAInt input_height,
|
|
||||||
SSAUBytePtr input_data,
|
|
||||||
SSAInt output_width,
|
|
||||||
SSAInt output_height,
|
|
||||||
SSAUBytePtr output_data,
|
|
||||||
SSAInt viewport_x,
|
|
||||||
SSAInt viewport_y,
|
|
||||||
SSAInt viewport_width,
|
|
||||||
SSAInt viewport_height,
|
|
||||||
SSAInt num_vertices,
|
|
||||||
std::vector<SSAVec4fPtr> fragment_ins,
|
|
||||||
SSAInt core,
|
|
||||||
SSAInt num_cores);
|
|
||||||
|
|
||||||
void codegen_render_scanline(int num_varyings);
|
|
||||||
void process_first_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
|
|
||||||
void process_last_pixels(OuterData &outer_data, SSAStack<SSAInt> &stack_x, SSAStack<SSAVec4f> &stack_xnormalized);
|
|
||||||
void inner_block(OuterData &data, SSAVec4f xnormalized, SSAVec4f *out_frag_colors);
|
|
||||||
void blend(SSAVec4f frag_colors[4], SSAVec16ub &dest);
|
|
||||||
|
|
||||||
GlslProgram &program;
|
|
||||||
GlslCodeGen &vertex_codegen;
|
|
||||||
GlslCodeGen &fragment_codegen;
|
|
||||||
};
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
|
@ -32,6 +32,11 @@ llvm::Type *SSAShort::llvm_type()
|
||||||
return llvm::Type::getInt16Ty(SSAScope::context());
|
return llvm::Type::getInt16Ty(SSAScope::context());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SSAInt SSAShort::zext_int()
|
||||||
|
{
|
||||||
|
return SSAInt::from_llvm(SSAScope::builder().CreateZExt(v, SSAInt::llvm_type(), SSAScope::hint()));
|
||||||
|
}
|
||||||
|
|
||||||
SSAShort operator+(const SSAShort &a, const SSAShort &b)
|
SSAShort operator+(const SSAShort &a, const SSAShort &b)
|
||||||
{
|
{
|
||||||
return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
|
return SSAShort::from_llvm(SSAScope::builder().CreateAdd(a.v, b.v, SSAScope::hint()));
|
||||||
|
|
|
@ -17,6 +17,8 @@ public:
|
||||||
static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); }
|
static SSAShort from_llvm(llvm::Value *v) { return SSAShort(v); }
|
||||||
static llvm::Type *llvm_type();
|
static llvm::Type *llvm_type();
|
||||||
|
|
||||||
|
SSAInt zext_int();
|
||||||
|
|
||||||
llvm::Value *v;
|
llvm::Value *v;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -49,6 +49,18 @@ SSAVec4i::SSAVec4i(SSAInt i)
|
||||||
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint());
|
v = SSAScope::builder().CreateShuffleVector(SSAScope::builder().CreateBitCast(i.v, m1xi32type, SSAScope::hint()), llvm::UndefValue::get(m1xi32type), mask, SSAScope::hint());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SSAVec4i::SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3)
|
||||||
|
: v(0)
|
||||||
|
{
|
||||||
|
std::vector<llvm::Constant*> constants;
|
||||||
|
constants.resize(4, llvm::ConstantInt::get(SSAScope::context(), llvm::APInt(32, 0, true)));
|
||||||
|
v = llvm::ConstantVector::get(constants);
|
||||||
|
v = SSAScope::builder().CreateInsertElement(v, i0.v, (uint64_t)0, SSAScope::hint());
|
||||||
|
v = SSAScope::builder().CreateInsertElement(v, i1.v, (uint64_t)1, SSAScope::hint());
|
||||||
|
v = SSAScope::builder().CreateInsertElement(v, i2.v, (uint64_t)2, SSAScope::hint());
|
||||||
|
v = SSAScope::builder().CreateInsertElement(v, i3.v, (uint64_t)3, SSAScope::hint());
|
||||||
|
}
|
||||||
|
|
||||||
SSAVec4i::SSAVec4i(SSAVec4f f32)
|
SSAVec4i::SSAVec4i(SSAVec4f f32)
|
||||||
: v(0)
|
: v(0)
|
||||||
{
|
{
|
||||||
|
|
|
@ -16,6 +16,7 @@ public:
|
||||||
SSAVec4i(int constant);
|
SSAVec4i(int constant);
|
||||||
SSAVec4i(int constant0, int constant1, int constant2, int constant3);
|
SSAVec4i(int constant0, int constant1, int constant2, int constant3);
|
||||||
SSAVec4i(SSAInt i);
|
SSAVec4i(SSAInt i);
|
||||||
|
SSAVec4i(SSAInt i0, SSAInt i1, SSAInt i2, SSAInt i3);
|
||||||
explicit SSAVec4i(llvm::Value *v);
|
explicit SSAVec4i(llvm::Value *v);
|
||||||
SSAVec4i(SSAVec4f f32);
|
SSAVec4i(SSAVec4f f32);
|
||||||
SSAInt operator[](SSAInt index);
|
SSAInt operator[](SSAInt index);
|
||||||
|
|
|
@ -300,50 +300,43 @@ void DrawerCommandQueue::StopThreads()
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
class DrawSpanFFCommand : public DrawerCommand
|
class DrawSpanLLVMCommand : public DrawerCommand
|
||||||
{
|
{
|
||||||
fixed_t _xfrac;
|
RenderArgs args;
|
||||||
fixed_t _yfrac;
|
|
||||||
fixed_t _xstep;
|
|
||||||
fixed_t _ystep;
|
|
||||||
int _x1;
|
|
||||||
int _x2;
|
|
||||||
int _y;
|
|
||||||
int _xbits;
|
|
||||||
int _ybits;
|
|
||||||
BYTE * RESTRICT _destorg;
|
|
||||||
|
|
||||||
const uint32_t * RESTRICT _source;
|
|
||||||
uint32_t _light;
|
|
||||||
ShadeConstants _shade_constants;
|
|
||||||
bool _nearest_filter;
|
|
||||||
|
|
||||||
uint32_t _srcalpha;
|
|
||||||
uint32_t _destalpha;
|
|
||||||
|
|
||||||
FixedFunction *_ff;
|
FixedFunction *_ff;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
DrawSpanFFCommand()
|
DrawSpanLLVMCommand()
|
||||||
{
|
{
|
||||||
_xfrac = ds_xfrac;
|
args.xfrac = ds_xfrac;
|
||||||
_yfrac = ds_yfrac;
|
args.yfrac = ds_yfrac;
|
||||||
_xstep = ds_xstep;
|
args.xstep = ds_xstep;
|
||||||
_ystep = ds_ystep;
|
args.ystep = ds_ystep;
|
||||||
_x1 = ds_x1;
|
args.x1 = ds_x1;
|
||||||
_x2 = ds_x2;
|
args.x2 = ds_x2;
|
||||||
_y = ds_y;
|
args.y = ds_y;
|
||||||
_xbits = ds_xbits;
|
args.xbits = ds_xbits;
|
||||||
_ybits = ds_ybits;
|
args.ybits = ds_ybits;
|
||||||
_destorg = dc_destorg;
|
args.destorg = (uint32_t*)dc_destorg;
|
||||||
|
args.destpitch = dc_pitch;
|
||||||
_source = (const uint32_t*)ds_source;
|
args.source = (const uint32_t*)ds_source;
|
||||||
_light = LightBgra::calc_light_multiplier(ds_light);
|
args.light = LightBgra::calc_light_multiplier(ds_light);
|
||||||
_shade_constants = ds_shade_constants;
|
args.light_red = ds_shade_constants.light_red;
|
||||||
_nearest_filter = !SampleBgra::span_sampler_setup(_source, _xbits, _ybits, _xstep, _ystep, ds_source_mipmapped);
|
args.light_green = ds_shade_constants.light_green;
|
||||||
|
args.light_blue = ds_shade_constants.light_blue;
|
||||||
_srcalpha = dc_srcalpha >> (FRACBITS - 8);
|
args.light_alpha = ds_shade_constants.light_alpha;
|
||||||
_destalpha = dc_destalpha >> (FRACBITS - 8);
|
args.fade_red = ds_shade_constants.fade_red;
|
||||||
|
args.fade_green = ds_shade_constants.fade_green;
|
||||||
|
args.fade_blue = ds_shade_constants.fade_blue;
|
||||||
|
args.fade_alpha = ds_shade_constants.fade_alpha;
|
||||||
|
args.desaturate = ds_shade_constants.desaturate;
|
||||||
|
args.srcalpha = dc_srcalpha >> (FRACBITS - 8);
|
||||||
|
args.destalpha = dc_destalpha >> (FRACBITS - 8);
|
||||||
|
args.flags = 0;
|
||||||
|
if (ds_shade_constants.simple_shade)
|
||||||
|
args.flags |= RenderArgs::simple_shade;
|
||||||
|
if (!SampleBgra::span_sampler_setup(args.source, args.xbits, args.ybits, args.xstep, args.ystep, ds_source_mipmapped))
|
||||||
|
args.flags |= RenderArgs::nearest_filter;
|
||||||
|
|
||||||
static FixedFunction ff;
|
static FixedFunction ff;
|
||||||
_ff = &ff;
|
_ff = &ff;
|
||||||
|
@ -351,25 +344,8 @@ public:
|
||||||
|
|
||||||
void Execute(DrawerThread *thread) override
|
void Execute(DrawerThread *thread) override
|
||||||
{
|
{
|
||||||
if (thread->skipped_by_thread(_y))
|
if (thread->skipped_by_thread(args.y))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
RenderArgs args;
|
|
||||||
args.destorg = (uint32_t *)_destorg;
|
|
||||||
args.source = _source;
|
|
||||||
args.destpitch = dc_pitch;
|
|
||||||
args.xfrac = _xfrac;
|
|
||||||
args.yfrac = _yfrac;
|
|
||||||
args.xstep = _xstep;
|
|
||||||
args.ystep = _ystep;
|
|
||||||
args.x1 = _x1;
|
|
||||||
args.x2 = _x2;
|
|
||||||
args.y = _y;
|
|
||||||
args.xbits = _xbits;
|
|
||||||
args.ybits = _ybits;
|
|
||||||
args.light = _light;
|
|
||||||
args.srcalpha = _srcalpha;
|
|
||||||
args.destalpha = _destalpha;
|
|
||||||
_ff->DrawSpan(&args);
|
_ff->DrawSpan(&args);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -2777,7 +2753,7 @@ void R_DrawRevSubClampTranslatedColumn_rgba()
|
||||||
|
|
||||||
void R_DrawSpan_rgba()
|
void R_DrawSpan_rgba()
|
||||||
{
|
{
|
||||||
DrawerCommandQueue::QueueCommand<DrawSpanFFCommand>();
|
DrawerCommandQueue::QueueCommand<DrawSpanLLVMCommand>();
|
||||||
/*
|
/*
|
||||||
#ifdef NO_SSE
|
#ifdef NO_SSE
|
||||||
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
|
DrawerCommandQueue::QueueCommand<DrawSpanRGBACommand>();
|
||||||
|
|
Loading…
Reference in a new issue