- add rect drawers for softpoly's player sprites

This commit is contained in:
Magnus Norddahl 2017-03-28 16:46:22 +02:00
parent 4716b712b1
commit e8ba1156af
9 changed files with 1015 additions and 490 deletions

View file

@ -199,3 +199,155 @@ void PolyDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint3
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0 - alpha); SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0 - alpha);
} }
} }
/////////////////////////////////////////////////////////////////////////////
void RectDrawArgs::SetTexture(const uint8_t *texels, int width, int height)
{
mTexturePixels = texels;
mTextureWidth = width;
mTextureHeight = height;
mTranslation = nullptr;
}
void RectDrawArgs::SetTexture(FTexture *texture)
{
mTextureWidth = texture->GetWidth();
mTextureHeight = texture->GetHeight();
if (PolyRenderer::Instance()->RenderTarget->IsBgra())
mTexturePixels = (const uint8_t *)texture->GetPixelsBgra();
else
mTexturePixels = texture->GetPixels();
mTranslation = nullptr;
}
void RectDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, bool forcePal)
{
if (translationID != 0xffffffff && translationID != 0)
{
FRemapTable *table = TranslationToTable(translationID);
if (table != nullptr && !table->Inactive)
{
if (PolyRenderer::Instance()->RenderTarget->IsBgra())
mTranslation = (uint8_t*)table->Palette;
else
mTranslation = table->Remap;
mTextureWidth = texture->GetWidth();
mTextureHeight = texture->GetHeight();
mTexturePixels = texture->GetPixels();
return;
}
}
if (forcePal)
{
mTextureWidth = texture->GetWidth();
mTextureHeight = texture->GetHeight();
mTexturePixels = texture->GetPixels();
}
else
{
SetTexture(texture);
}
}
void RectDrawArgs::SetLight(FSWColormap *base_colormap, uint32_t lightlevel)
{
mLight = clamp<uint32_t>(lightlevel, 0, 255);
mLightRed = base_colormap->Color.r * 256 / 255;
mLightGreen = base_colormap->Color.g * 256 / 255;
mLightBlue = base_colormap->Color.b * 256 / 255;
mLightAlpha = base_colormap->Color.a * 256 / 255;
mFadeRed = base_colormap->Fade.r;
mFadeGreen = base_colormap->Fade.g;
mFadeBlue = base_colormap->Fade.b;
mFadeAlpha = base_colormap->Fade.a;
mDesaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256;
mSimpleShade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0);
mColormaps = base_colormap->Maps;
}
void RectDrawArgs::SetColor(uint32_t bgra, uint8_t palindex)
{
if (PolyRenderer::Instance()->RenderTarget->IsBgra())
{
mColor = bgra;
}
else
{
mColor = palindex;
}
}
void RectDrawArgs::Draw(double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1)
{
mX0 = (float)x0;
mX1 = (float)x1;
mY0 = (float)y0;
mY1 = (float)y1;
mU0 = (float)u0;
mU1 = (float)u1;
mV0 = (float)v0;
mV1 = (float)v1;
PolyRenderer::Instance()->DrawQueue->Push<DrawRectCommand>(*this);
}
void RectDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright)
{
bool forcePal = (renderstyle == LegacyRenderStyles[STYLE_Shaded] || renderstyle == LegacyRenderStyles[STYLE_AddShaded]);
SetTexture(tex, translationID, forcePal);
if (renderstyle == LegacyRenderStyles[STYLE_Normal] || (r_drawfuzz == 0 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy]))
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 1.0, 0.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Add] && fullbright && alpha == 1.0 && !Translation())
{
SetStyle(TriBlendMode::TextureAddSrcColor, 1.0, 1.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Add])
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Subtract])
{
SetStyle(Translation() ? TriBlendMode::TranslatedRevSub : TriBlendMode::TextureRevSub, alpha, 1.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_SoulTrans])
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, transsouls, 1.0 - transsouls);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Fuzzy] || (r_drawfuzz == 2 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy]))
{ // NYI - Fuzzy - for now, just a copy of "Shadow"
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 0.0, 160 / 255.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Shadow] || (r_drawfuzz == 1 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy]))
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 0.0, 160 / 255.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_TranslucentStencil])
{
SetColor(0xff000000 | fillcolor, fillcolor >> 24);
SetStyle(TriBlendMode::Stencil, alpha, 1.0 - alpha);
}
else if (renderstyle == LegacyRenderStyles[STYLE_AddStencil])
{
SetColor(0xff000000 | fillcolor, fillcolor >> 24);
SetStyle(TriBlendMode::AddStencil, alpha, 1.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Shaded])
{
SetColor(0xff000000 | fillcolor, fillcolor >> 24);
SetStyle(TriBlendMode::Shaded, alpha, 1.0 - alpha);
}
else if (renderstyle == LegacyRenderStyles[STYLE_AddShaded])
{
SetColor(0xff000000 | fillcolor, fillcolor >> 24);
SetStyle(TriBlendMode::AddShaded, alpha, 1.0);
}
else
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0 - alpha);
}
}

View file

@ -149,3 +149,71 @@ private:
bool mNearestFilter = true; bool mNearestFilter = true;
bool mFixedLight = false; bool mFixedLight = false;
}; };
class RectDrawArgs
{
public:
void SetTexture(const uint8_t *texels, int width, int height);
void SetTexture(FTexture *texture);
void SetTexture(FTexture *texture, uint32_t translationID, bool forcePal = false);
void SetLight(FSWColormap *basecolormap, uint32_t lightlevel);
void SetStyle(TriBlendMode blendmode, double srcalpha = 1.0, double destalpha = 1.0) { mBlendMode = blendmode; mSrcAlpha = (uint32_t)(srcalpha * 256.0 + 0.5); mDestAlpha = (uint32_t)(destalpha * 256.0 + 0.5); }
void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright);
void SetColor(uint32_t bgra, uint8_t palindex);
void Draw(double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1);
const uint8_t *TexturePixels() const { return mTexturePixels; }
int TextureWidth() const { return mTextureWidth; }
int TextureHeight() const { return mTextureHeight; }
const uint8_t *Translation() const { return mTranslation; }
TriBlendMode BlendMode() const { return mBlendMode; }
uint32_t Color() const { return mColor; }
uint32_t SrcAlpha() const { return mSrcAlpha; }
uint32_t DestAlpha() const { return mDestAlpha; }
uint32_t Light() const { return mLight; }
const uint8_t *BaseColormap() const { return mColormaps; }
uint16_t ShadeLightAlpha() const { return mLightAlpha; }
uint16_t ShadeLightRed() const { return mLightRed; }
uint16_t ShadeLightGreen() const { return mLightGreen; }
uint16_t ShadeLightBlue() const { return mLightBlue; }
uint16_t ShadeFadeAlpha() const { return mFadeAlpha; }
uint16_t ShadeFadeRed() const { return mFadeRed; }
uint16_t ShadeFadeGreen() const { return mFadeGreen; }
uint16_t ShadeFadeBlue() const { return mFadeBlue; }
uint16_t ShadeDesaturate() const { return mDesaturate; }
bool SimpleShade() const { return mSimpleShade; }
float X0() const { return mX0; }
float X1() const { return mX1; }
float Y0() const { return mY0; }
float Y1() const { return mY1; }
float U0() const { return mU0; }
float U1() const { return mU1; }
float V0() const { return mV0; }
float V1() const { return mV1; }
private:
const uint8_t *mTexturePixels = nullptr;
int mTextureWidth = 0;
int mTextureHeight = 0;
const uint8_t *mTranslation = nullptr;
const uint8_t *mColormaps = nullptr;
TriBlendMode mBlendMode = TriBlendMode::FillOpaque;
uint32_t mLight = 0;
uint32_t mColor = 0;
uint32_t mSrcAlpha = 0;
uint32_t mDestAlpha = 0;
uint16_t mLightAlpha = 0;
uint16_t mLightRed = 0;
uint16_t mLightGreen = 0;
uint16_t mLightBlue = 0;
uint16_t mFadeAlpha = 0;
uint16_t mFadeRed = 0;
uint16_t mFadeGreen = 0;
uint16_t mFadeBlue = 0;
uint16_t mDesaturate = 0;
bool mSimpleShade = true;
float mX0, mX1, mY0, mY1, mU0, mU1, mV0, mV1;
};

View file

@ -24,6 +24,246 @@
#include "screen_triangle.h" #include "screen_triangle.h"
namespace TriScreenDrawerModes
{
template<typename SamplerT, typename FilterModeT>
FORCEINLINE unsigned int VECTORCALL Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation)
{
uint32_t texel;
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill)
{
return color;
}
else if (SamplerT::Mode == (int)Samplers::Translated)
{
const uint8_t *texpal = (const uint8_t *)texPixels;
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return translation[texpal[texelX * texHeight + texelY]];
}
else if (FilterModeT::Mode == (int)FilterModes::Nearest)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
texel = texPixels[texelX * texHeight + texelY];
}
else
{
u -= oneU >> 1;
v -= oneV >> 1;
unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth;
unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth;
unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight;
unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight;
unsigned int x0 = frac_x0 >> FRACBITS;
unsigned int x1 = frac_x1 >> FRACBITS;
unsigned int y0 = frac_y0 >> FRACBITS;
unsigned int y1 = frac_y1 >> FRACBITS;
unsigned int p00 = texPixels[x0 * texHeight + y0];
unsigned int p01 = texPixels[x0 * texHeight + y1];
unsigned int p10 = texPixels[x1 * texHeight + y0];
unsigned int p11 = texPixels[x1 * texHeight + y1];
unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15;
unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15;
unsigned int a = 16 - inv_a;
unsigned int b = 16 - inv_b;
unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8;
texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue;
}
if (SamplerT::Mode == (int)Samplers::Skycap)
{
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256);
int a = MIN(alpha_top, alpha_bottom);
int inv_a = 256 - a;
uint32_t r = RPART(texel);
uint32_t g = GPART(texel);
uint32_t b = BPART(texel);
uint32_t fg_a = APART(texel);
uint32_t bg_red = RPART(color);
uint32_t bg_green = GPART(color);
uint32_t bg_blue = BPART(color);
r = (r * a + bg_red * inv_a + 127) >> 8;
g = (g * a + bg_green * inv_a + 127) >> 8;
b = (b * a + bg_blue * inv_a + 127) >> 8;
return MAKEARGB(fg_a, r, g, b);
}
else
{
return texel;
}
}
template<typename SamplerT>
FORCEINLINE unsigned int VECTORCALL SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight)
{
if (SamplerT::Mode == (int)Samplers::Shaded)
{
const uint8_t *texpal = (const uint8_t *)texPixels;
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = texpal[texelX * texHeight + texelY];
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else if (SamplerT::Mode == (int)Samplers::Stencil)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]);
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else
{
return 0;
}
}
template<typename ShadeModeT>
FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light)
{
if (ShadeModeT::Mode == (int)ShadeMode::Simple)
{
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8);
}
else
{
int blue0 = BPART(ifgcolor0);
int green0 = GPART(ifgcolor0);
int red0 = RPART(ifgcolor0);
int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate;
int blue1 = BPART(ifgcolor1);
int green1 = GPART(ifgcolor1);
int red1 = RPART(ifgcolor1);
int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate;
__m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0);
fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8);
fgcolor = _mm_mullo_epi16(fgcolor, mlight);
fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8);
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8);
}
return fgcolor;
}
template<typename BlendT>
FORCEINLINE __m128i VECTORCALL Blend32(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha)
{
if (BlendT::Mode == (int)BlendModes::Opaque)
{
__m128i outcolor = fgcolor;
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::Masked)
{
__m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128());
mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
__m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor));
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
{
__m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7)));
__m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8));
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::Shaded)
{
ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8;
ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8;
__m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0);
__m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha);
fgcolor = _mm_mullo_epi16(fgcolor, alpha);
bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha);
__m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
{
ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8;
ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8;
__m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0);
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8);
__m128i outcolor = _mm_add_epi16(fgcolor, bgcolor);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else
{
uint32_t alpha0 = APART(ifgcolor0);
uint32_t alpha1 = APART(ifgcolor1);
alpha0 += alpha0 >> 7; // 255->256
alpha1 += alpha1 >> 7; // 255->256
uint32_t inv_alpha0 = 256 - alpha0;
uint32_t inv_alpha1 = 256 - alpha1;
uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8;
uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8;
uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8;
uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8;
__m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0);
__m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0);
fgcolor = _mm_mullo_epi16(fgcolor, fgalpha);
bgcolor = _mm_mullo_epi16(bgcolor, bgalpha);
__m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128());
__m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128());
__m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128());
__m128i out_lo, out_hi;
if (BlendT::Mode == (int)BlendModes::AddClamp)
{
out_lo = _mm_add_epi32(fg_lo, bg_lo);
out_hi = _mm_add_epi32(fg_hi, bg_hi);
}
else if (BlendT::Mode == (int)BlendModes::SubClamp)
{
out_lo = _mm_sub_epi32(fg_lo, bg_lo);
out_hi = _mm_sub_epi32(fg_hi, bg_hi);
}
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
{
out_lo = _mm_sub_epi32(bg_lo, fg_lo);
out_hi = _mm_sub_epi32(bg_hi, fg_hi);
}
out_lo = _mm_srai_epi32(out_lo, 8);
out_hi = _mm_srai_epi32(out_hi, 8);
__m128i outcolor = _mm_packs_epi32(out_lo, out_hi);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
}
}
template<typename BlendT, typename SamplerT> template<typename BlendT, typename SamplerT>
class TriScreenDrawer32 class TriScreenDrawer32
{ {
@ -66,6 +306,7 @@ public:
} }
} }
private:
template<typename ShadeModeT, typename FilterModeT> template<typename ShadeModeT, typename FilterModeT>
FORCEINLINE static void VECTORCALL Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread) FORCEINLINE static void VECTORCALL Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
{ {
@ -198,13 +439,13 @@ public:
// Sample fgcolor // Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2]; unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight); ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU; posU += stepU;
posV += stepV; posV += stepV;
ifgcolor[1] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight); ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU; posU += stepU;
posV += stepV; posV += stepV;
@ -228,8 +469,8 @@ public:
// Shade and blend // Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); __m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result // Store result
_mm_storel_epi64((__m128i*)(dest + x * 8 + ix * 2), outcolor); _mm_storel_epi64((__m128i*)(dest + x * 8 + ix * 2), outcolor);
@ -298,13 +539,13 @@ public:
// Sample fgcolor // Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2]; unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight); ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU; posU += stepU;
posV += stepV; posV += stepV;
ifgcolor[1] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight); ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU; posU += stepU;
posV += stepV; posV += stepV;
@ -328,8 +569,8 @@ public:
// Shade and blend // Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); __m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result // Store result
_mm_storel_epi64((__m128i*)desttmp, outcolor); _mm_storel_epi64((__m128i*)desttmp, outcolor);
@ -387,13 +628,13 @@ public:
// Sample fgcolor // Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2]; unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight); ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU; posU += stepU;
posV += stepV; posV += stepV;
ifgcolor[1] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation); ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight); ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU; posU += stepU;
posV += stepV; posV += stepV;
@ -417,8 +658,8 @@ public:
// Shade and blend // Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light); fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha); __m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result // Store result
_mm_storel_epi64((__m128i*)desttmp, outcolor); _mm_storel_epi64((__m128i*)desttmp, outcolor);
@ -437,250 +678,6 @@ public:
} }
} }
private:
template<typename FilterModeT>
FORCEINLINE static unsigned int VECTORCALL Sample(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation)
{
using namespace TriScreenDrawerModes;
uint32_t texel;
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill)
{
return color;
}
else if (SamplerT::Mode == (int)Samplers::Translated)
{
const uint8_t *texpal = (const uint8_t *)texPixels;
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return translation[texpal[texelX * texHeight + texelY]];
}
else if (FilterModeT::Mode == (int)FilterModes::Nearest)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
texel = texPixels[texelX * texHeight + texelY];
}
else
{
u -= oneU >> 1;
v -= oneV >> 1;
unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth;
unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth;
unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight;
unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight;
unsigned int x0 = frac_x0 >> FRACBITS;
unsigned int x1 = frac_x1 >> FRACBITS;
unsigned int y0 = frac_y0 >> FRACBITS;
unsigned int y1 = frac_y1 >> FRACBITS;
unsigned int p00 = texPixels[x0 * texHeight + y0];
unsigned int p01 = texPixels[x0 * texHeight + y1];
unsigned int p10 = texPixels[x1 * texHeight + y0];
unsigned int p11 = texPixels[x1 * texHeight + y1];
unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15;
unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15;
unsigned int a = 16 - inv_a;
unsigned int b = 16 - inv_b;
unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8;
texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue;
}
if (SamplerT::Mode == (int)Samplers::Skycap)
{
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256);
int a = MIN(alpha_top, alpha_bottom);
int inv_a = 256 - a;
uint32_t r = RPART(texel);
uint32_t g = GPART(texel);
uint32_t b = BPART(texel);
uint32_t fg_a = APART(texel);
uint32_t bg_red = RPART(color);
uint32_t bg_green = GPART(color);
uint32_t bg_blue = BPART(color);
r = (r * a + bg_red * inv_a + 127) >> 8;
g = (g * a + bg_green * inv_a + 127) >> 8;
b = (b * a + bg_blue * inv_a + 127) >> 8;
return MAKEARGB(fg_a, r, g, b);
}
else
{
return texel;
}
}
FORCEINLINE static unsigned int VECTORCALL SampleShade(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight)
{
using namespace TriScreenDrawerModes;
if (SamplerT::Mode == (int)Samplers::Shaded)
{
const uint8_t *texpal = (const uint8_t *)texPixels;
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = texpal[texelX * texHeight + texelY];
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else if (SamplerT::Mode == (int)Samplers::Stencil)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]);
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else
{
return 0;
}
}
template<typename ShadeModeT>
FORCEINLINE static __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light)
{
using namespace TriScreenDrawerModes;
if (ShadeModeT::Mode == (int)ShadeMode::Simple)
{
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8);
}
else
{
int blue0 = BPART(ifgcolor0);
int green0 = GPART(ifgcolor0);
int red0 = RPART(ifgcolor0);
int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate;
int blue1 = BPART(ifgcolor1);
int green1 = GPART(ifgcolor1);
int red1 = RPART(ifgcolor1);
int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate;
__m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0);
fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8);
fgcolor = _mm_mullo_epi16(fgcolor, mlight);
fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8);
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8);
}
return fgcolor;
}
FORCEINLINE static __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha)
{
using namespace TriScreenDrawerModes;
if (BlendT::Mode == (int)BlendModes::Opaque)
{
__m128i outcolor = fgcolor;
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::Masked)
{
__m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128());
mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
__m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor));
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
{
__m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7)));
__m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8));
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::Shaded)
{
ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8;
ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8;
__m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0);
__m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha);
fgcolor = _mm_mullo_epi16(fgcolor, alpha);
bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha);
__m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
{
ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8;
ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8;
__m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0);
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8);
__m128i outcolor = _mm_add_epi16(fgcolor, bgcolor);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else
{
uint32_t alpha0 = APART(ifgcolor0);
uint32_t alpha1 = APART(ifgcolor1);
alpha0 += alpha0 >> 7; // 255->256
alpha1 += alpha1 >> 7; // 255->256
uint32_t inv_alpha0 = 256 - alpha0;
uint32_t inv_alpha1 = 256 - alpha1;
uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8;
uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8;
uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8;
uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8;
__m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0);
__m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0);
fgcolor = _mm_mullo_epi16(fgcolor, fgalpha);
bgcolor = _mm_mullo_epi16(bgcolor, bgalpha);
__m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128());
__m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128());
__m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128());
__m128i out_lo, out_hi;
if (BlendT::Mode == (int)BlendModes::AddClamp)
{
out_lo = _mm_add_epi32(fg_lo, bg_lo);
out_hi = _mm_add_epi32(fg_hi, bg_hi);
}
else if (BlendT::Mode == (int)BlendModes::SubClamp)
{
out_lo = _mm_sub_epi32(fg_lo, bg_lo);
out_hi = _mm_sub_epi32(fg_hi, bg_hi);
}
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
{
out_lo = _mm_sub_epi32(bg_lo, fg_lo);
out_hi = _mm_sub_epi32(bg_hi, fg_hi);
}
out_lo = _mm_srai_epi32(out_lo, 8);
out_hi = _mm_srai_epi32(out_hi, 8);
__m128i outcolor = _mm_packs_epi32(out_lo, out_hi);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
}
static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2)
{ {
float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2);
@ -695,3 +692,168 @@ private:
return top / bottom; return top / bottom;
} }
}; };
template<typename BlendT, typename SamplerT>
class RectScreenDrawer32
{
public:
static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread)
{
using namespace TriScreenDrawerModes;
if (args->SimpleShade())
{
Loop<SimpleShade, NearestFilter>(destOrg, destWidth, destHeight, destPitch, args, thread);
}
else
{
Loop<AdvancedShade, NearestFilter>(destOrg, destWidth, destHeight, destPitch, args, thread);
}
}
private:
template<typename ShadeModeT, typename FilterModeT>
FORCEINLINE static void VECTORCALL Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread)
{
using namespace TriScreenDrawerModes;
int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth);
int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth);
int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight);
int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight);
if (x1 <= x0 || y1 <= y0)
return;
uint32_t srcalpha = args->SrcAlpha();
uint32_t destalpha = args->DestAlpha();
// Setup step variables
float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0());
float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0());
uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000);
uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000);
uint32_t stepU = (int32_t)(fstepU * 0x1000000);
uint32_t stepV = (int32_t)(fstepV * 0x1000000);
// Sampling stuff
uint32_t color = args->Color();
const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation();
const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels();
uint32_t texWidth = args->TextureWidth();
uint32_t texHeight = args->TextureHeight();
uint32_t oneU, oneV;
if (SamplerT::Mode != (int)Samplers::Fill)
{
oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1;
oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1;
}
else
{
oneU = 0;
oneV = 0;
}
// Shade constants
__m128i inv_desaturate, shade_fade, shade_light;
int desaturate;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
inv_desaturate = _mm_setr_epi16(256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate());
shade_fade = _mm_set_epi16(args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue(), args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue());
shade_light = _mm_set_epi16(args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue(), args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue());
desaturate = args->ShadeDesaturate();
}
else
{
inv_desaturate = _mm_setzero_si128();
shade_fade = _mm_setzero_si128();
shade_light = _mm_setzero_si128();
desaturate = 0;
}
// Setup light
uint32_t lightpos = args->Light();
lightpos += lightpos >> 7; // 255 -> 256
__m128i mlight = _mm_set_epi16(256, lightpos, lightpos, lightpos, 256, lightpos, lightpos, lightpos);
__m128i shade_fade_lit;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
__m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight);
shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light);
}
else
{
shade_fade_lit = _mm_setzero_si128();
}
int count = x1 - x0;
int sseCount = count / 2;
uint32_t posV = startV;
for (int y = y0; y < y1; y++, posV += stepV)
{
int coreBlock = y / 8;
if (coreBlock % thread->num_cores != thread->core)
continue;
uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0;
uint32_t posU = startU;
for (int i = 0; i < sseCount; i++)
{
// Load bgcolor
__m128i bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)dest), _mm_setzero_si128());
else
bgcolor = _mm_setzero_si128();
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
_mm_storel_epi64((__m128i*)dest, outcolor);
dest += 2;
}
if (sseCount * 2 != count)
{
// Load bgcolor
__m128i bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dest), _mm_setzero_si128());
else
bgcolor = _mm_setzero_si128();
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
ifgcolor[1] = 0;
ifgshade[1] = 0;
posU += stepU;
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
*dest = _mm_cvtsi128_si32(outcolor);
}
}
}
};

View file

@ -24,6 +24,187 @@
#include "screen_triangle.h" #include "screen_triangle.h"
namespace TriScreenDrawerModes
{
template<typename SamplerT>
FORCEINLINE unsigned int Sample8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation)
{
uint8_t texel;
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill)
{
return color;
}
else if (SamplerT::Mode == (int)Samplers::Translated)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return translation[texPixels[texelX * texHeight + texelY]];
}
else
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
texel = texPixels[texelX * texHeight + texelY];
}
if (SamplerT::Mode == (int)Samplers::Skycap)
{
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256);
int a = MIN(alpha_top, alpha_bottom);
int inv_a = 256 - a;
if (a == 256)
return texel;
uint32_t capcolor = GPalette.BaseColors[color].d;
uint32_t texelrgb = GPalette.BaseColors[texel].d;
uint32_t r = RPART(texelrgb);
uint32_t g = GPART(texelrgb);
uint32_t b = BPART(texelrgb);
uint32_t capcolor_red = RPART(capcolor);
uint32_t capcolor_green = GPART(capcolor);
uint32_t capcolor_blue = BPART(capcolor);
r = (r * a + capcolor_red * inv_a + 127) >> 8;
g = (g * a + capcolor_green * inv_a + 127) >> 8;
b = (b * a + capcolor_blue * inv_a + 127) >> 8;
return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)];
}
else
{
return texel;
}
}
template<typename SamplerT>
FORCEINLINE unsigned int SampleShade8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight)
{
if (SamplerT::Mode == (int)Samplers::Shaded)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY];
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else if (SamplerT::Mode == (int)Samplers::Stencil)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0;
}
else
{
return 0;
}
}
template<typename BlendT>
FORCEINLINE uint8_t ShadeAndBlend8(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha)
{
lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00;
uint8_t shadedfg = colormaps[lightshade + fgcolor];
if (BlendT::Mode == (int)BlendModes::Opaque)
{
return shadedfg;
}
else if (BlendT::Mode == (int)BlendModes::Masked)
{
return (fgcolor != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
{
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7));
int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7));
int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7));
fg_r = MIN<int32_t>(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255);
fg_g = MIN<int32_t>(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255);
fg_b = MIN<int32_t>(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255);
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (fgcolor != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::Shaded)
{
fgshade = (fgshade * srcalpha + 128) >> 8;
uint32_t alpha = fgshade;
uint32_t inv_alpha = 256 - fgshade;
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8;
fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8;
fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8;
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (alpha != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
{
fgshade = (fgshade * srcalpha + 128) >> 8;
uint32_t alpha = fgshade;
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
fg_r = MIN<int32_t>(bg_r + ((fg_r * alpha + 127) >> 8), 255);
fg_g = MIN<int32_t>(bg_g + ((fg_g * alpha + 127) >> 8), 255);
fg_b = MIN<int32_t>(bg_b + ((fg_b * alpha + 127) >> 8), 255);
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (alpha != 0) ? shadedfg : bgcolor;
}
else
{
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
if (BlendT::Mode == (int)BlendModes::AddClamp)
{
fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255);
fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255);
fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255);
}
else if (BlendT::Mode == (int)BlendModes::SubClamp)
{
fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0);
fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0);
fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0);
}
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
{
fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0);
fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0);
fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0);
}
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (fgcolor != 0) ? shadedfg : bgcolor;
}
}
}
template<typename BlendT, typename SamplerT> template<typename BlendT, typename SamplerT>
class TriScreenDrawer8 class TriScreenDrawer8
{ {
@ -123,9 +304,9 @@ public:
{ {
int lightshade = lightpos >> 8; int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x * 8 + ix]; uint8_t bgcolor = dest[x * 8 + ix];
uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation); uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight); uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
dest[x * 8 + ix] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); dest[x * 8 + ix] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
posU += stepU; posU += stepU;
posV += stepV; posV += stepV;
lightpos += lightstep; lightpos += lightstep;
@ -185,9 +366,9 @@ public:
{ {
int lightshade = lightpos >> 8; int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x]; uint8_t bgcolor = dest[x];
uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation); uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight); uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
dest[x] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); dest[x] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
} }
posU += stepU; posU += stepU;
@ -236,9 +417,9 @@ public:
{ {
int lightshade = lightpos >> 8; int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x]; uint8_t bgcolor = dest[x];
uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation); uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight); uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
dest[x] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha); dest[x] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
} }
posU += stepU; posU += stepU;
@ -258,187 +439,6 @@ public:
} }
private: private:
FORCEINLINE static unsigned int Sample(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation)
{
using namespace TriScreenDrawerModes;
uint8_t texel;
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill)
{
return color;
}
else if (SamplerT::Mode == (int)Samplers::Translated)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return translation[texPixels[texelX * texHeight + texelY]];
}
else
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
texel = texPixels[texelX * texHeight + texelY];
}
if (SamplerT::Mode == (int)Samplers::Skycap)
{
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256);
int a = MIN(alpha_top, alpha_bottom);
int inv_a = 256 - a;
if (a == 256)
return texel;
uint32_t capcolor = GPalette.BaseColors[color].d;
uint32_t texelrgb = GPalette.BaseColors[texel].d;
uint32_t r = RPART(texelrgb);
uint32_t g = GPART(texelrgb);
uint32_t b = BPART(texelrgb);
uint32_t capcolor_red = RPART(capcolor);
uint32_t capcolor_green = GPART(capcolor);
uint32_t capcolor_blue = BPART(capcolor);
r = (r * a + capcolor_red * inv_a + 127) >> 8;
g = (g * a + capcolor_green * inv_a + 127) >> 8;
b = (b * a + capcolor_blue * inv_a + 127) >> 8;
return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)];
}
else
{
return texel;
}
}
FORCEINLINE static unsigned int SampleShade(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight)
{
using namespace TriScreenDrawerModes;
if (SamplerT::Mode == (int)Samplers::Shaded)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY];
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else if (SamplerT::Mode == (int)Samplers::Stencil)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0;
}
else
{
return 0;
}
}
FORCEINLINE static uint8_t ShadeAndBlend(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha)
{
using namespace TriScreenDrawerModes;
lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00;
uint8_t shadedfg = colormaps[lightshade + fgcolor];
if (BlendT::Mode == (int)BlendModes::Opaque)
{
return shadedfg;
}
else if (BlendT::Mode == (int)BlendModes::Masked)
{
return (fgcolor != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
{
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7));
int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7));
int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7));
fg_r = MIN<int32_t>(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255);
fg_g = MIN<int32_t>(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255);
fg_b = MIN<int32_t>(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255);
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (fgcolor != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::Shaded)
{
fgshade = (fgshade * srcalpha + 128) >> 8;
uint32_t alpha = fgshade;
uint32_t inv_alpha = 256 - fgshade;
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8;
fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8;
fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8;
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (alpha != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
{
fgshade = (fgshade * srcalpha + 128) >> 8;
uint32_t alpha = fgshade;
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
fg_r = MIN<int32_t>(bg_r + ((fg_r * alpha + 127) >> 8), 255);
fg_g = MIN<int32_t>(bg_g + ((fg_g * alpha + 127) >> 8), 255);
fg_b = MIN<int32_t>(bg_b + ((fg_b * alpha + 127) >> 8), 255);
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (alpha != 0) ? shadedfg : bgcolor;
}
else
{
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
if (BlendT::Mode == (int)BlendModes::AddClamp)
{
fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255);
fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255);
fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255);
}
else if (BlendT::Mode == (int)BlendModes::SubClamp)
{
fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0);
fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0);
fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0);
}
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
{
fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0);
fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0);
fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0);
}
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (fgcolor != 0) ? shadedfg : bgcolor;
}
}
static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2) static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2)
{ {
float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2); float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2);
@ -453,3 +453,68 @@ private:
return top / bottom; return top / bottom;
} }
}; };
template<typename BlendT, typename SamplerT>
class RectScreenDrawer8
{
public:
static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread)
{
using namespace TriScreenDrawerModes;
int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth);
int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth);
int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight);
int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight);
if (x1 <= x0 || y1 <= y0)
return;
auto colormaps = args->BaseColormap();
uint32_t srcalpha = args->SrcAlpha();
uint32_t destalpha = args->DestAlpha();
// Setup step variables
float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0());
float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0());
uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000);
uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000);
uint32_t stepU = (int32_t)(fstepU * 0x1000000);
uint32_t stepV = (int32_t)(fstepV * 0x1000000);
// Sampling stuff
uint32_t color = args->Color();
const uint8_t * RESTRICT translation = args->Translation();
const uint8_t * RESTRICT texPixels = args->TexturePixels();
uint32_t texWidth = args->TextureWidth();
uint32_t texHeight = args->TextureHeight();
// Setup light
uint32_t lightshade = args->Light();
lightshade += lightshade >> 7; // 255 -> 256
int count = x1 - x0;
uint32_t posV = startV;
for (int y = y0; y < y1; y++, posV += stepV)
{
int coreBlock = y / 8;
if (coreBlock % thread->num_cores != thread->core)
continue;
uint8_t *dest = ((uint8_t*)destOrg) + y * destPitch + x0;
uint32_t posU = startU;
for (int i = 0; i < count; i++)
{
uint8_t bgcolor = *dest;
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
*dest = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
posU += stepU;
dest++;
}
}
}
};

View file

@ -449,3 +449,23 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread)
PolyTriangleDrawer::draw_arrays(args, &thread_data); PolyTriangleDrawer::draw_arrays(args, &thread_data);
} }
/////////////////////////////////////////////////////////////////////////////
void DrawRectCommand::Execute(DrawerThread *thread)
{
WorkerThreadData thread_data;
thread_data.core = thread->core;
thread_data.num_cores = thread->num_cores;
auto renderTarget = PolyRenderer::Instance()->RenderTarget;
const void *destOrg = renderTarget->GetBuffer();
int destWidth = renderTarget->GetWidth();
int destHeight = renderTarget->GetHeight();
int destPitch = renderTarget->GetPitch();
int blendmode = (int)args.BlendMode();
if (renderTarget->IsBgra())
ScreenTriangle::RectDrawers32[blendmode](destOrg, destWidth, destHeight, destPitch, &args, &thread_data);
else
ScreenTriangle::RectDrawers8[blendmode](destOrg, destWidth, destHeight, destPitch, &args, &thread_data);
}

View file

@ -71,3 +71,15 @@ public:
private: private:
PolyDrawArgs args; PolyDrawArgs args;
}; };
class DrawRectCommand : public DrawerCommand
{
public:
DrawRectCommand(const RectDrawArgs &args) : args(args) { }
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "DrawRect"; }
private:
RectDrawArgs args;
};

View file

@ -922,7 +922,7 @@ void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThrea
} }
} }
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTriangle::TriDrawers8 = void(*ScreenTriangle::TriDrawers8[])(const TriDrawTriangleArgs *, WorkerThreadData *) =
{ {
&TriScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque &TriScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
&TriScreenDrawer8<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked &TriScreenDrawer8<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
@ -950,11 +950,14 @@ std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTria
#ifdef NO_SSE #ifdef NO_SSE
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTriangle::TriDrawers32; void(*ScreenTriangle::TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *) =
{
nullptr
};
#else #else
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTriangle::TriDrawers32 = void(*ScreenTriangle::TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *) =
{ {
&TriScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque &TriScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
&TriScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked &TriScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
@ -981,3 +984,66 @@ std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTria
}; };
#endif #endif
void(*ScreenTriangle::RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
{
&RectScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
&RectScreenDrawer8<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
&RectScreenDrawer8<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAdd
&RectScreenDrawer8<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureSub
&RectScreenDrawer8<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureRevSub
&RectScreenDrawer8<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAddSrcColor
&RectScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedOpaque
&RectScreenDrawer8<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedMasked
&RectScreenDrawer8<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAdd
&RectScreenDrawer8<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedSub
&RectScreenDrawer8<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedRevSub
&RectScreenDrawer8<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAddSrcColor
&RectScreenDrawer8<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // Shaded
&RectScreenDrawer8<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // AddShaded
&RectScreenDrawer8<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // Stencil
&RectScreenDrawer8<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // AddStencil
&RectScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillOpaque
&RectScreenDrawer8<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAdd
&RectScreenDrawer8<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillSub
&RectScreenDrawer8<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillRevSub
&RectScreenDrawer8<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAddSrcColor
&RectScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::SkycapSampler>::Execute // Skycap
};
#ifdef NO_SSE
void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
{
nullptr
};
#else
void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
{
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
&RectScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAdd
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureSub
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureRevSub
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAddSrcColor
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedOpaque
&RectScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedMasked
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAdd
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedSub
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedRevSub
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAddSrcColor
&RectScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // Shaded
&RectScreenDrawer32<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // AddShaded
&RectScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // Stencil
&RectScreenDrawer32<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // AddStencil
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillOpaque
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAdd
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillSub
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillRevSub
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAddSrcColor
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::SkycapSampler>::Execute // Skycap
};
#endif

View file

@ -85,6 +85,8 @@ struct TriDrawTriangleArgs
const PolyDrawArgs *uniforms; const PolyDrawArgs *uniforms;
}; };
class RectDrawArgs;
enum class TriBlendMode enum class TriBlendMode
{ {
TextureOpaque, TextureOpaque,
@ -119,8 +121,10 @@ public:
static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread); static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
static std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriDrawers8; static void(*TriDrawers8[])(const TriDrawTriangleArgs *, WorkerThreadData *);
static std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriDrawers32; static void(*TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *);
static void(*RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
static void(*RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
}; };
struct ScreenTriangleStepVariables struct ScreenTriangleStepVariables

View file

@ -576,52 +576,28 @@ fixed_t RenderPolyPlayerSprites::LightLevelToShade(int lightlevel, bool foggy)
void PolyNoAccelPlayerSprite::Render() void PolyNoAccelPlayerSprite::Render()
{ {
#if 0
if (xscale == 0 || fabs(yscale) < (1.0f / 32000.0f)) if (xscale == 0 || fabs(yscale) < (1.0f / 32000.0f))
{ // scaled to 0; can't see { // scaled to 0; can't see
return; return;
} }
SpriteDrawerArgs drawerargs; RectDrawArgs args;
drawerargs.SetLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS); args.SetStyle(RenderStyle, Alpha, FillColor, Translation, pic, false);
args.SetLight(Light.BaseColormap, 255 - (Light.ColormapNum << 3));
FDynamicColormap *basecolormap = static_cast<FDynamicColormap*>(Light.BaseColormap);
bool visible = drawerargs.SetStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS);
if (!visible)
return;
double spryscale = yscale;
bool sprflipvert = false;
fixed_t iscale = FLOAT2FIXED(1 / yscale);
double centerY = viewheight / 2; double centerY = viewheight / 2;
double y1, y2;
double sprtopscreen;
if (renderflags & RF_YFLIP) if (renderflags & RF_YFLIP)
{ {
sprflipvert = true; y1 = centerY + (texturemid - pic->GetHeight()) * (-yscale);
spryscale = -spryscale; y2 = y1 + pic->GetHeight() * (-yscale);
iscale = -iscale;
sprtopscreen = centerY + (texturemid - pic->GetHeight()) * spryscale;
} }
else else
{ {
sprflipvert = false; y1 = centerY - texturemid * yscale;
sprtopscreen = centerY - texturemid * spryscale; y2 = y1 + pic->GetHeight() * yscale;
} }
args.Draw(x1, x2, y1, y2, 0.0f, 1.0f, 0.0f, 1.0f);
// clip to screen bounds
short *mfloorclip = screenheightarray;
short *mceilingclip = zeroarray;
fixed_t frac = startfrac;
for (int x = x1; x < x2; x++)
{
drawerargs.DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false);
frac += xiscale;
}
#endif
} }
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////