Add light, blend and sampler functions

This commit is contained in:
Magnus Norddahl 2016-09-28 05:18:16 +02:00
parent f9a7186550
commit 576fed5afc
4 changed files with 173 additions and 7 deletions

View file

@ -154,8 +154,7 @@ void FixedFunction::CodegenDrawSpan()
SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
//SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
//*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants);
colors[i] = source[spot64 * 4].load_vec4ub() * light / 256;
colors[i] = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light);
stack_xfrac.store(xfrac + xstep);
stack_yfrac.store(yfrac + ystep);
@ -181,9 +180,7 @@ void FixedFunction::CodegenDrawSpan()
SSAInt spot64 = ((xfrac >> (32 - 6 - 6))&(63 * 64)) + (yfrac >> (32 - 6));
//SSAInt spot = ((xfrac >> xshift) & xmask) + (yfrac >> yshift);
//*loop.dest = LightBgra::shade_bgra(_source[loop.spot64()], _light, _shade_constants);
SSAVec4i color = source[spot64 * 4].load_vec4ub();
color = color * light / 256;
SSAVec4i color = shade_bgra_simple(source[spot64 * 4].load_vec4ub(), light);
data[index * 4].store_vec4ub(color);
stack_index.store(index + 1);
@ -200,6 +197,122 @@ void FixedFunction::CodegenDrawSpan()
mProgram.functionPassManager()->run(*function.func);
}
SSAInt FixedFunction::calc_light_multiplier(SSAInt light)
{
return 256 - (light >> (FRACBITS - 8));
}
SSAVec4i FixedFunction::shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors)
{
SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index];
return shade_bgra_simple(color, light);
}
SSAVec4i FixedFunction::shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors)
{
SSAVec4i color = basecolors[index * 4].load_vec4ub(); // = GPalette.BaseColors[index];
return shade_bgra_advanced(color, light, constants);
}
SSAVec4i FixedFunction::shade_bgra_simple(SSAVec4i color, SSAInt light)
{
color = color * light / 256;
return color.insert(3, 255);
}
SSAVec4i FixedFunction::shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants)
{
SSAInt blue = color[0];
SSAInt green = color[1];
SSAInt red = color[2];
SSAInt alpha = color[3];
SSAInt intensity = ((red * 77 + green * 143 + blue * 37) >> 8) * constants.desaturate;
SSAVec4i inv_light = 256 - light;
SSAVec4i inv_desaturate = 256 - constants.desaturate;
color = (color * inv_desaturate + intensity) / 256;
color = (constants.fade * inv_light + color * light) / 256;
color = (color * constants.light) / 256;
return color.insert(3, alpha);
}
SSAVec4i FixedFunction::blend_copy(SSAVec4i fg)
{
return fg;
}
SSAVec4i FixedFunction::blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha)
{
SSAVec4i color = (fg * srcalpha + bg * destalpha) / 256;
return color.insert(3, 255);
}
SSAVec4i FixedFunction::blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha)
{
SSAVec4i color = (bg * destalpha - fg * srcalpha) / 256;
return color.insert(3, 255);
}
SSAVec4i FixedFunction::blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha)
{
SSAVec4i color = (fg * srcalpha - bg * destalpha) / 256;
return color.insert(3, 255);
}
SSAVec4i FixedFunction::blend_alpha_blend(SSAVec4i fg, SSAVec4i bg)
{
SSAInt alpha = fg[3];
alpha = alpha + (alpha >> 7); // // 255 -> 256
SSAInt inv_alpha = 256 - alpha;
SSAVec4i color = (fg * alpha + bg * inv_alpha) / 256;
return color.insert(3, 255);
}
SSAVec4i FixedFunction::sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height)
{
SSAInt frac_y0 = (texturefracy >> FRACBITS) * height;
SSAInt frac_y1 = ((texturefracy + one) >> FRACBITS) * height;
SSAInt y0 = frac_y0 >> FRACBITS;
SSAInt y1 = frac_y1 >> FRACBITS;
SSAVec4i p00 = col0[y0].load_vec4ub();
SSAVec4i p01 = col0[y1].load_vec4ub();
SSAVec4i p10 = col1[y0].load_vec4ub();
SSAVec4i p11 = col1[y1].load_vec4ub();
SSAInt inv_b = texturefracx;
SSAInt inv_a = (frac_y1 >> (FRACBITS - 4)) & 15;
SSAInt a = 16 - inv_a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
SSAVec4i FixedFunction::sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits)
{
SSAInt xshift = (32 - xbits);
SSAInt yshift = (32 - ybits);
SSAInt xmask = (SSAInt(1) << xshift) - 1;
SSAInt ymask = (SSAInt(1) << yshift) - 1;
SSAInt x = xfrac >> xbits;
SSAInt y = yfrac >> ybits;
SSAVec4i p00 = texture[(y & ymask) + ((x & xmask) << yshift)].load_vec4ub();
SSAVec4i p01 = texture[((y + 1) & ymask) + ((x & xmask) << yshift)].load_vec4ub();
SSAVec4i p10 = texture[(y & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub();
SSAVec4i p11 = texture[((y + 1) & ymask) + (((x + 1) & xmask) << yshift)].load_vec4ub();
SSAInt inv_b = (xfrac >> (xbits - 4)) & 15;
SSAInt inv_a = (yfrac >> (ybits - 4)) & 15;
SSAInt a = 16 - inv_a;
SSAInt b = 16 - inv_b;
return (p00 * (a * b) + p01 * (inv_a * b) + p10 * (a * inv_b) + p11 * (inv_a * inv_b) + 127) >> 8;
}
llvm::Type *FixedFunction::GetRenderArgsStruct(llvm::LLVMContext &context)
{
std::vector<llvm::Type *> elements;

View file

@ -6,6 +6,7 @@
#include "r_compiler/ssa/ssa_vec8s.h"
#include "r_compiler/ssa/ssa_vec16ub.h"
#include "r_compiler/ssa/ssa_int.h"
#include "r_compiler/ssa/ssa_int_ptr.h"
#include "r_compiler/ssa/ssa_short.h"
#include "r_compiler/ssa/ssa_ubyte_ptr.h"
#include "r_compiler/ssa/ssa_vec4f_ptr.h"
@ -57,8 +58,30 @@ struct RenderArgs
uint32_t light;
uint32_t srcalpha;
uint32_t destalpha;
//ShadeConstants _shade_constants;
//int32_t nearest_filter;
uint16_t light_alpha;
uint16_t light_red;
uint16_t light_green;
uint16_t light_blue;
uint16_t fade_alpha;
uint16_t fade_red;
uint16_t fade_green;
uint16_t fade_blue;
uint16_t desaturate;
uint32_t flags;
enum Flags
{
simple_shade = 1,
nearest_filter = 2
};
};
class SSAShadeConstants
{
public:
SSAVec4i light;
SSAVec4i fade;
SSAInt desaturate;
};
class FixedFunction
@ -71,6 +94,24 @@ public:
private:
void CodegenDrawSpan();
// LightBgra
SSAInt calc_light_multiplier(SSAInt light);
SSAVec4i shade_pal_index_simple(SSAInt index, SSAInt light, SSAUBytePtr basecolors);
SSAVec4i shade_pal_index_advanced(SSAInt index, SSAInt light, const SSAShadeConstants &constants, SSAUBytePtr basecolors);
SSAVec4i shade_bgra_simple(SSAVec4i color, SSAInt light);
SSAVec4i shade_bgra_advanced(SSAVec4i color, SSAInt light, const SSAShadeConstants &constants);
// BlendBgra
SSAVec4i blend_copy(SSAVec4i fg);
SSAVec4i blend_add(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_sub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_revsub(SSAVec4i fg, SSAVec4i bg, SSAInt srcalpha, SSAInt destalpha);
SSAVec4i blend_alpha_blend(SSAVec4i fg, SSAVec4i bg);
// SampleBgra
SSAVec4i sample_linear(SSAUBytePtr col0, SSAUBytePtr col1, SSAInt texturefracx, SSAInt texturefracy, SSAInt one, SSAInt height);
SSAVec4i sample_linear(SSAUBytePtr texture, SSAInt xfrac, SSAInt yfrac, SSAInt xbits, SSAInt ybits);
static llvm::Type *GetRenderArgsStruct(llvm::LLVMContext &context);
RenderProgram mProgram;

View file

@ -60,6 +60,16 @@ SSAInt SSAVec4i::operator[](SSAInt index)
return SSAInt::from_llvm(SSAScope::builder().CreateExtractElement(v, index.v, SSAScope::hint()));
}
SSAVec4i SSAVec4i::insert(SSAInt index, SSAInt value)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index.v, SSAScope::hint()));
}
SSAVec4i SSAVec4i::insert(int index, SSAInt value)
{
return SSAVec4i::from_llvm(SSAScope::builder().CreateInsertElement(v, value.v, index, SSAScope::hint()));
}
llvm::Type *SSAVec4i::llvm_type()
{
return llvm::VectorType::get(llvm::Type::getInt32Ty(SSAScope::context()), 4);

View file

@ -19,6 +19,8 @@ public:
explicit SSAVec4i(llvm::Value *v);
SSAVec4i(SSAVec4f f32);
SSAInt operator[](SSAInt index);
SSAVec4i insert(SSAInt index, SSAInt value);
SSAVec4i insert(int index, SSAInt value);
static SSAVec4i bitcast(SSAVec4f f32);
static SSAVec4i bitcast(SSAVec8s i16);
static SSAVec4i shuffle(const SSAVec4i &f0, int index0, int index1, int index2, int index3);