- add rect drawers for softpoly's player sprites

This commit is contained in:
Magnus Norddahl 2017-03-28 16:46:22 +02:00
parent 4716b712b1
commit e8ba1156af
9 changed files with 1015 additions and 490 deletions

View File

@ -199,3 +199,155 @@ void PolyDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint3
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0 - alpha);
}
}
/////////////////////////////////////////////////////////////////////////////
void RectDrawArgs::SetTexture(const uint8_t *texels, int width, int height)
{
mTexturePixels = texels;
mTextureWidth = width;
mTextureHeight = height;
mTranslation = nullptr;
}
void RectDrawArgs::SetTexture(FTexture *texture)
{
mTextureWidth = texture->GetWidth();
mTextureHeight = texture->GetHeight();
if (PolyRenderer::Instance()->RenderTarget->IsBgra())
mTexturePixels = (const uint8_t *)texture->GetPixelsBgra();
else
mTexturePixels = texture->GetPixels();
mTranslation = nullptr;
}
void RectDrawArgs::SetTexture(FTexture *texture, uint32_t translationID, bool forcePal)
{
if (translationID != 0xffffffff && translationID != 0)
{
FRemapTable *table = TranslationToTable(translationID);
if (table != nullptr && !table->Inactive)
{
if (PolyRenderer::Instance()->RenderTarget->IsBgra())
mTranslation = (uint8_t*)table->Palette;
else
mTranslation = table->Remap;
mTextureWidth = texture->GetWidth();
mTextureHeight = texture->GetHeight();
mTexturePixels = texture->GetPixels();
return;
}
}
if (forcePal)
{
mTextureWidth = texture->GetWidth();
mTextureHeight = texture->GetHeight();
mTexturePixels = texture->GetPixels();
}
else
{
SetTexture(texture);
}
}
void RectDrawArgs::SetLight(FSWColormap *base_colormap, uint32_t lightlevel)
{
mLight = clamp<uint32_t>(lightlevel, 0, 255);
mLightRed = base_colormap->Color.r * 256 / 255;
mLightGreen = base_colormap->Color.g * 256 / 255;
mLightBlue = base_colormap->Color.b * 256 / 255;
mLightAlpha = base_colormap->Color.a * 256 / 255;
mFadeRed = base_colormap->Fade.r;
mFadeGreen = base_colormap->Fade.g;
mFadeBlue = base_colormap->Fade.b;
mFadeAlpha = base_colormap->Fade.a;
mDesaturate = MIN(abs(base_colormap->Desaturate), 255) * 255 / 256;
mSimpleShade = (base_colormap->Color.d == 0x00ffffff && base_colormap->Fade.d == 0x00000000 && base_colormap->Desaturate == 0);
mColormaps = base_colormap->Maps;
}
void RectDrawArgs::SetColor(uint32_t bgra, uint8_t palindex)
{
if (PolyRenderer::Instance()->RenderTarget->IsBgra())
{
mColor = bgra;
}
else
{
mColor = palindex;
}
}
void RectDrawArgs::Draw(double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1)
{
mX0 = (float)x0;
mX1 = (float)x1;
mY0 = (float)y0;
mY1 = (float)y1;
mU0 = (float)u0;
mU1 = (float)u1;
mV0 = (float)v0;
mV1 = (float)v1;
PolyRenderer::Instance()->DrawQueue->Push<DrawRectCommand>(*this);
}
void RectDrawArgs::SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *tex, bool fullbright)
{
bool forcePal = (renderstyle == LegacyRenderStyles[STYLE_Shaded] || renderstyle == LegacyRenderStyles[STYLE_AddShaded]);
SetTexture(tex, translationID, forcePal);
if (renderstyle == LegacyRenderStyles[STYLE_Normal] || (r_drawfuzz == 0 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy]))
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 1.0, 0.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Add] && fullbright && alpha == 1.0 && !Translation())
{
SetStyle(TriBlendMode::TextureAddSrcColor, 1.0, 1.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Add])
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Subtract])
{
SetStyle(Translation() ? TriBlendMode::TranslatedRevSub : TriBlendMode::TextureRevSub, alpha, 1.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_SoulTrans])
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, transsouls, 1.0 - transsouls);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Fuzzy] || (r_drawfuzz == 2 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy]))
{ // NYI - Fuzzy - for now, just a copy of "Shadow"
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 0.0, 160 / 255.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Shadow] || (r_drawfuzz == 1 && renderstyle == LegacyRenderStyles[STYLE_OptFuzzy]))
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, 0.0, 160 / 255.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_TranslucentStencil])
{
SetColor(0xff000000 | fillcolor, fillcolor >> 24);
SetStyle(TriBlendMode::Stencil, alpha, 1.0 - alpha);
}
else if (renderstyle == LegacyRenderStyles[STYLE_AddStencil])
{
SetColor(0xff000000 | fillcolor, fillcolor >> 24);
SetStyle(TriBlendMode::AddStencil, alpha, 1.0);
}
else if (renderstyle == LegacyRenderStyles[STYLE_Shaded])
{
SetColor(0xff000000 | fillcolor, fillcolor >> 24);
SetStyle(TriBlendMode::Shaded, alpha, 1.0 - alpha);
}
else if (renderstyle == LegacyRenderStyles[STYLE_AddShaded])
{
SetColor(0xff000000 | fillcolor, fillcolor >> 24);
SetStyle(TriBlendMode::AddShaded, alpha, 1.0);
}
else
{
SetStyle(Translation() ? TriBlendMode::TranslatedAdd : TriBlendMode::TextureAdd, alpha, 1.0 - alpha);
}
}

View File

@ -149,3 +149,71 @@ private:
bool mNearestFilter = true;
bool mFixedLight = false;
};
class RectDrawArgs
{
public:
void SetTexture(const uint8_t *texels, int width, int height);
void SetTexture(FTexture *texture);
void SetTexture(FTexture *texture, uint32_t translationID, bool forcePal = false);
void SetLight(FSWColormap *basecolormap, uint32_t lightlevel);
void SetStyle(TriBlendMode blendmode, double srcalpha = 1.0, double destalpha = 1.0) { mBlendMode = blendmode; mSrcAlpha = (uint32_t)(srcalpha * 256.0 + 0.5); mDestAlpha = (uint32_t)(destalpha * 256.0 + 0.5); }
void SetStyle(const FRenderStyle &renderstyle, double alpha, uint32_t fillcolor, uint32_t translationID, FTexture *texture, bool fullbright);
void SetColor(uint32_t bgra, uint8_t palindex);
void Draw(double x0, double x1, double y0, double y1, double u0, double u1, double v0, double v1);
const uint8_t *TexturePixels() const { return mTexturePixels; }
int TextureWidth() const { return mTextureWidth; }
int TextureHeight() const { return mTextureHeight; }
const uint8_t *Translation() const { return mTranslation; }
TriBlendMode BlendMode() const { return mBlendMode; }
uint32_t Color() const { return mColor; }
uint32_t SrcAlpha() const { return mSrcAlpha; }
uint32_t DestAlpha() const { return mDestAlpha; }
uint32_t Light() const { return mLight; }
const uint8_t *BaseColormap() const { return mColormaps; }
uint16_t ShadeLightAlpha() const { return mLightAlpha; }
uint16_t ShadeLightRed() const { return mLightRed; }
uint16_t ShadeLightGreen() const { return mLightGreen; }
uint16_t ShadeLightBlue() const { return mLightBlue; }
uint16_t ShadeFadeAlpha() const { return mFadeAlpha; }
uint16_t ShadeFadeRed() const { return mFadeRed; }
uint16_t ShadeFadeGreen() const { return mFadeGreen; }
uint16_t ShadeFadeBlue() const { return mFadeBlue; }
uint16_t ShadeDesaturate() const { return mDesaturate; }
bool SimpleShade() const { return mSimpleShade; }
float X0() const { return mX0; }
float X1() const { return mX1; }
float Y0() const { return mY0; }
float Y1() const { return mY1; }
float U0() const { return mU0; }
float U1() const { return mU1; }
float V0() const { return mV0; }
float V1() const { return mV1; }
private:
const uint8_t *mTexturePixels = nullptr;
int mTextureWidth = 0;
int mTextureHeight = 0;
const uint8_t *mTranslation = nullptr;
const uint8_t *mColormaps = nullptr;
TriBlendMode mBlendMode = TriBlendMode::FillOpaque;
uint32_t mLight = 0;
uint32_t mColor = 0;
uint32_t mSrcAlpha = 0;
uint32_t mDestAlpha = 0;
uint16_t mLightAlpha = 0;
uint16_t mLightRed = 0;
uint16_t mLightGreen = 0;
uint16_t mLightBlue = 0;
uint16_t mFadeAlpha = 0;
uint16_t mFadeRed = 0;
uint16_t mFadeGreen = 0;
uint16_t mFadeBlue = 0;
uint16_t mDesaturate = 0;
bool mSimpleShade = true;
float mX0, mX1, mY0, mY1, mU0, mU1, mV0, mV1;
};

View File

@ -24,6 +24,246 @@
#include "screen_triangle.h"
namespace TriScreenDrawerModes
{
template<typename SamplerT, typename FilterModeT>
FORCEINLINE unsigned int VECTORCALL Sample32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation)
{
uint32_t texel;
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill)
{
return color;
}
else if (SamplerT::Mode == (int)Samplers::Translated)
{
const uint8_t *texpal = (const uint8_t *)texPixels;
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return translation[texpal[texelX * texHeight + texelY]];
}
else if (FilterModeT::Mode == (int)FilterModes::Nearest)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
texel = texPixels[texelX * texHeight + texelY];
}
else
{
u -= oneU >> 1;
v -= oneV >> 1;
unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth;
unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth;
unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight;
unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight;
unsigned int x0 = frac_x0 >> FRACBITS;
unsigned int x1 = frac_x1 >> FRACBITS;
unsigned int y0 = frac_y0 >> FRACBITS;
unsigned int y1 = frac_y1 >> FRACBITS;
unsigned int p00 = texPixels[x0 * texHeight + y0];
unsigned int p01 = texPixels[x0 * texHeight + y1];
unsigned int p10 = texPixels[x1 * texHeight + y0];
unsigned int p11 = texPixels[x1 * texHeight + y1];
unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15;
unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15;
unsigned int a = 16 - inv_a;
unsigned int b = 16 - inv_b;
unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8;
texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue;
}
if (SamplerT::Mode == (int)Samplers::Skycap)
{
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256);
int a = MIN(alpha_top, alpha_bottom);
int inv_a = 256 - a;
uint32_t r = RPART(texel);
uint32_t g = GPART(texel);
uint32_t b = BPART(texel);
uint32_t fg_a = APART(texel);
uint32_t bg_red = RPART(color);
uint32_t bg_green = GPART(color);
uint32_t bg_blue = BPART(color);
r = (r * a + bg_red * inv_a + 127) >> 8;
g = (g * a + bg_green * inv_a + 127) >> 8;
b = (b * a + bg_blue * inv_a + 127) >> 8;
return MAKEARGB(fg_a, r, g, b);
}
else
{
return texel;
}
}
template<typename SamplerT>
FORCEINLINE unsigned int VECTORCALL SampleShade32(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight)
{
if (SamplerT::Mode == (int)Samplers::Shaded)
{
const uint8_t *texpal = (const uint8_t *)texPixels;
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = texpal[texelX * texHeight + texelY];
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else if (SamplerT::Mode == (int)Samplers::Stencil)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]);
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else
{
return 0;
}
}
template<typename ShadeModeT>
FORCEINLINE __m128i VECTORCALL Shade32(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light)
{
if (ShadeModeT::Mode == (int)ShadeMode::Simple)
{
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8);
}
else
{
int blue0 = BPART(ifgcolor0);
int green0 = GPART(ifgcolor0);
int red0 = RPART(ifgcolor0);
int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate;
int blue1 = BPART(ifgcolor1);
int green1 = GPART(ifgcolor1);
int red1 = RPART(ifgcolor1);
int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate;
__m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0);
fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8);
fgcolor = _mm_mullo_epi16(fgcolor, mlight);
fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8);
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8);
}
return fgcolor;
}
template<typename BlendT>
FORCEINLINE __m128i VECTORCALL Blend32(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha)
{
if (BlendT::Mode == (int)BlendModes::Opaque)
{
__m128i outcolor = fgcolor;
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::Masked)
{
__m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128());
mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
__m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor));
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
{
__m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7)));
__m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8));
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::Shaded)
{
ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8;
ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8;
__m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0);
__m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha);
fgcolor = _mm_mullo_epi16(fgcolor, alpha);
bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha);
__m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
{
ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8;
ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8;
__m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0);
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8);
__m128i outcolor = _mm_add_epi16(fgcolor, bgcolor);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else
{
uint32_t alpha0 = APART(ifgcolor0);
uint32_t alpha1 = APART(ifgcolor1);
alpha0 += alpha0 >> 7; // 255->256
alpha1 += alpha1 >> 7; // 255->256
uint32_t inv_alpha0 = 256 - alpha0;
uint32_t inv_alpha1 = 256 - alpha1;
uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8;
uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8;
uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8;
uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8;
__m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0);
__m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0);
fgcolor = _mm_mullo_epi16(fgcolor, fgalpha);
bgcolor = _mm_mullo_epi16(bgcolor, bgalpha);
__m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128());
__m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128());
__m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128());
__m128i out_lo, out_hi;
if (BlendT::Mode == (int)BlendModes::AddClamp)
{
out_lo = _mm_add_epi32(fg_lo, bg_lo);
out_hi = _mm_add_epi32(fg_hi, bg_hi);
}
else if (BlendT::Mode == (int)BlendModes::SubClamp)
{
out_lo = _mm_sub_epi32(fg_lo, bg_lo);
out_hi = _mm_sub_epi32(fg_hi, bg_hi);
}
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
{
out_lo = _mm_sub_epi32(bg_lo, fg_lo);
out_hi = _mm_sub_epi32(bg_hi, fg_hi);
}
out_lo = _mm_srai_epi32(out_lo, 8);
out_hi = _mm_srai_epi32(out_hi, 8);
__m128i outcolor = _mm_packs_epi32(out_lo, out_hi);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
}
}
template<typename BlendT, typename SamplerT>
class TriScreenDrawer32
{
@ -66,6 +306,7 @@ public:
}
}
private:
template<typename ShadeModeT, typename FilterModeT>
FORCEINLINE static void VECTORCALL Loop(const TriDrawTriangleArgs *args, WorkerThreadData *thread)
{
@ -198,13 +439,13 @@ public:
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight);
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
posV += stepV;
ifgcolor[1] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight);
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
posV += stepV;
@ -228,8 +469,8 @@ public:
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
_mm_storel_epi64((__m128i*)(dest + x * 8 + ix * 2), outcolor);
@ -298,13 +539,13 @@ public:
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight);
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
posV += stepV;
ifgcolor[1] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight);
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
posV += stepV;
@ -328,8 +569,8 @@ public:
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
_mm_storel_epi64((__m128i*)desttmp, outcolor);
@ -387,13 +628,13 @@ public:
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade(posU, posV, texPixels, texWidth, texHeight);
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
posV += stepV;
ifgcolor[1] = Sample<FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade(posU, posV, texPixels, texWidth, texHeight);
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
posV += stepV;
@ -417,8 +658,8 @@ public:
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
_mm_storel_epi64((__m128i*)desttmp, outcolor);
@ -437,250 +678,6 @@ public:
}
}
private:
template<typename FilterModeT>
FORCEINLINE static unsigned int VECTORCALL Sample(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight, uint32_t oneU, uint32_t oneV, uint32_t color, const uint32_t *translation)
{
using namespace TriScreenDrawerModes;
uint32_t texel;
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill)
{
return color;
}
else if (SamplerT::Mode == (int)Samplers::Translated)
{
const uint8_t *texpal = (const uint8_t *)texPixels;
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return translation[texpal[texelX * texHeight + texelY]];
}
else if (FilterModeT::Mode == (int)FilterModes::Nearest)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
texel = texPixels[texelX * texHeight + texelY];
}
else
{
u -= oneU >> 1;
v -= oneV >> 1;
unsigned int frac_x0 = (((uint32_t)u << 8) >> FRACBITS) * texWidth;
unsigned int frac_x1 = ((((uint32_t)u << 8) + oneU) >> FRACBITS) * texWidth;
unsigned int frac_y0 = (((uint32_t)v << 8) >> FRACBITS) * texHeight;
unsigned int frac_y1 = ((((uint32_t)v << 8) + oneV) >> FRACBITS) * texHeight;
unsigned int x0 = frac_x0 >> FRACBITS;
unsigned int x1 = frac_x1 >> FRACBITS;
unsigned int y0 = frac_y0 >> FRACBITS;
unsigned int y1 = frac_y1 >> FRACBITS;
unsigned int p00 = texPixels[x0 * texHeight + y0];
unsigned int p01 = texPixels[x0 * texHeight + y1];
unsigned int p10 = texPixels[x1 * texHeight + y0];
unsigned int p11 = texPixels[x1 * texHeight + y1];
unsigned int inv_a = (frac_x1 >> (FRACBITS - 4)) & 15;
unsigned int inv_b = (frac_y1 >> (FRACBITS - 4)) & 15;
unsigned int a = 16 - inv_a;
unsigned int b = 16 - inv_b;
unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int sgreen = (GPART(p00) * (a * b) + GPART(p01) * (inv_a * b) + GPART(p10) * (a * inv_b) + GPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int sblue = (BPART(p00) * (a * b) + BPART(p01) * (inv_a * b) + BPART(p10) * (a * inv_b) + BPART(p11) * (inv_a * inv_b) + 127) >> 8;
unsigned int salpha = (APART(p00) * (a * b) + APART(p01) * (inv_a * b) + APART(p10) * (a * inv_b) + APART(p11) * (inv_a * inv_b) + 127) >> 8;
texel = (salpha << 24) | (sred << 16) | (sgreen << 8) | sblue;
}
if (SamplerT::Mode == (int)Samplers::Skycap)
{
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256);
int a = MIN(alpha_top, alpha_bottom);
int inv_a = 256 - a;
uint32_t r = RPART(texel);
uint32_t g = GPART(texel);
uint32_t b = BPART(texel);
uint32_t fg_a = APART(texel);
uint32_t bg_red = RPART(color);
uint32_t bg_green = GPART(color);
uint32_t bg_blue = BPART(color);
r = (r * a + bg_red * inv_a + 127) >> 8;
g = (g * a + bg_green * inv_a + 127) >> 8;
b = (b * a + bg_blue * inv_a + 127) >> 8;
return MAKEARGB(fg_a, r, g, b);
}
else
{
return texel;
}
}
FORCEINLINE static unsigned int VECTORCALL SampleShade(int32_t u, int32_t v, const uint32_t *texPixels, int texWidth, int texHeight)
{
using namespace TriScreenDrawerModes;
if (SamplerT::Mode == (int)Samplers::Shaded)
{
const uint8_t *texpal = (const uint8_t *)texPixels;
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = texpal[texelX * texHeight + texelY];
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else if (SamplerT::Mode == (int)Samplers::Stencil)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = APART(texPixels[texelX * texHeight + texelY]);
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else
{
return 0;
}
}
template<typename ShadeModeT>
FORCEINLINE static __m128i VECTORCALL Shade(__m128i fgcolor, __m128i mlight, unsigned int ifgcolor0, unsigned int ifgcolor1, int desaturate, __m128i inv_desaturate, __m128i shade_fade, __m128i shade_light)
{
using namespace TriScreenDrawerModes;
if (ShadeModeT::Mode == (int)ShadeMode::Simple)
{
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8);
}
else
{
int blue0 = BPART(ifgcolor0);
int green0 = GPART(ifgcolor0);
int red0 = RPART(ifgcolor0);
int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate;
int blue1 = BPART(ifgcolor1);
int green1 = GPART(ifgcolor1);
int red1 = RPART(ifgcolor1);
int intensity1 = ((red1 * 77 + green1 * 143 + blue1 * 37) >> 8) * desaturate;
__m128i intensity = _mm_set_epi16(0, intensity1, intensity1, intensity1, 0, intensity0, intensity0, intensity0);
fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8);
fgcolor = _mm_mullo_epi16(fgcolor, mlight);
fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8);
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8);
}
return fgcolor;
}
FORCEINLINE static __m128i VECTORCALL Blend(__m128i fgcolor, __m128i bgcolor, unsigned int ifgcolor0, unsigned int ifgcolor1, unsigned int ifgshade0, unsigned int ifgshade1, uint32_t srcalpha, uint32_t destalpha)
{
using namespace TriScreenDrawerModes;
if (BlendT::Mode == (int)BlendModes::Opaque)
{
__m128i outcolor = fgcolor;
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::Masked)
{
__m128i mask = _mm_cmpeq_epi32(_mm_packus_epi16(fgcolor, _mm_setzero_si128()), _mm_setzero_si128());
mask = _mm_unpacklo_epi8(mask, _mm_setzero_si128());
__m128i outcolor = _mm_or_si128(_mm_and_si128(mask, bgcolor), _mm_andnot_si128(mask, fgcolor));
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
{
__m128i inv_srccolor = _mm_sub_epi16(_mm_set1_epi16(256), _mm_add_epi16(fgcolor, _mm_srli_epi16(fgcolor, 7)));
__m128i outcolor = _mm_add_epi16(fgcolor, _mm_srli_epi16(_mm_mullo_epi16(bgcolor, inv_srccolor), 8));
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::Shaded)
{
ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8;
ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8;
__m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0);
__m128i inv_alpha = _mm_sub_epi16(_mm_set1_epi16(256), alpha);
fgcolor = _mm_mullo_epi16(fgcolor, alpha);
bgcolor = _mm_mullo_epi16(bgcolor, inv_alpha);
__m128i outcolor = _mm_srli_epi16(_mm_add_epi16(fgcolor, bgcolor), 8);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
{
ifgshade0 = (ifgshade0 * srcalpha + 128) >> 8;
ifgshade1 = (ifgshade1 * srcalpha + 128) >> 8;
__m128i alpha = _mm_set_epi16(ifgshade1, ifgshade1, ifgshade1, ifgshade1, ifgshade0, ifgshade0, ifgshade0, ifgshade0);
fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, alpha), 8);
__m128i outcolor = _mm_add_epi16(fgcolor, bgcolor);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
else
{
uint32_t alpha0 = APART(ifgcolor0);
uint32_t alpha1 = APART(ifgcolor1);
alpha0 += alpha0 >> 7; // 255->256
alpha1 += alpha1 >> 7; // 255->256
uint32_t inv_alpha0 = 256 - alpha0;
uint32_t inv_alpha1 = 256 - alpha1;
uint32_t bgalpha0 = (destalpha * alpha0 + (inv_alpha0 << 8) + 128) >> 8;
uint32_t bgalpha1 = (destalpha * alpha1 + (inv_alpha1 << 8) + 128) >> 8;
uint32_t fgalpha0 = (srcalpha * alpha0 + 128) >> 8;
uint32_t fgalpha1 = (srcalpha * alpha1 + 128) >> 8;
__m128i bgalpha = _mm_set_epi16(bgalpha1, bgalpha1, bgalpha1, bgalpha1, bgalpha0, bgalpha0, bgalpha0, bgalpha0);
__m128i fgalpha = _mm_set_epi16(fgalpha1, fgalpha1, fgalpha1, fgalpha1, fgalpha0, fgalpha0, fgalpha0, fgalpha0);
fgcolor = _mm_mullo_epi16(fgcolor, fgalpha);
bgcolor = _mm_mullo_epi16(bgcolor, bgalpha);
__m128i fg_lo = _mm_unpacklo_epi16(fgcolor, _mm_setzero_si128());
__m128i bg_lo = _mm_unpacklo_epi16(bgcolor, _mm_setzero_si128());
__m128i fg_hi = _mm_unpackhi_epi16(fgcolor, _mm_setzero_si128());
__m128i bg_hi = _mm_unpackhi_epi16(bgcolor, _mm_setzero_si128());
__m128i out_lo, out_hi;
if (BlendT::Mode == (int)BlendModes::AddClamp)
{
out_lo = _mm_add_epi32(fg_lo, bg_lo);
out_hi = _mm_add_epi32(fg_hi, bg_hi);
}
else if (BlendT::Mode == (int)BlendModes::SubClamp)
{
out_lo = _mm_sub_epi32(fg_lo, bg_lo);
out_hi = _mm_sub_epi32(fg_hi, bg_hi);
}
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
{
out_lo = _mm_sub_epi32(bg_lo, fg_lo);
out_hi = _mm_sub_epi32(bg_hi, fg_hi);
}
out_lo = _mm_srai_epi32(out_lo, 8);
out_hi = _mm_srai_epi32(out_hi, 8);
__m128i outcolor = _mm_packs_epi32(out_lo, out_hi);
outcolor = _mm_packus_epi16(outcolor, _mm_setzero_si128());
outcolor = _mm_or_si128(outcolor, _mm_set1_epi32(0xff000000));
return outcolor;
}
}
static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2)
{
float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2);
@ -695,3 +692,168 @@ private:
return top / bottom;
}
};
template<typename BlendT, typename SamplerT>
class RectScreenDrawer32
{
public:
static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread)
{
using namespace TriScreenDrawerModes;
if (args->SimpleShade())
{
Loop<SimpleShade, NearestFilter>(destOrg, destWidth, destHeight, destPitch, args, thread);
}
else
{
Loop<AdvancedShade, NearestFilter>(destOrg, destWidth, destHeight, destPitch, args, thread);
}
}
private:
template<typename ShadeModeT, typename FilterModeT>
FORCEINLINE static void VECTORCALL Loop(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread)
{
using namespace TriScreenDrawerModes;
int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth);
int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth);
int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight);
int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight);
if (x1 <= x0 || y1 <= y0)
return;
uint32_t srcalpha = args->SrcAlpha();
uint32_t destalpha = args->DestAlpha();
// Setup step variables
float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0());
float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0());
uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000);
uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000);
uint32_t stepU = (int32_t)(fstepU * 0x1000000);
uint32_t stepV = (int32_t)(fstepV * 0x1000000);
// Sampling stuff
uint32_t color = args->Color();
const uint32_t * RESTRICT translation = (const uint32_t *)args->Translation();
const uint32_t * RESTRICT texPixels = (const uint32_t *)args->TexturePixels();
uint32_t texWidth = args->TextureWidth();
uint32_t texHeight = args->TextureHeight();
uint32_t oneU, oneV;
if (SamplerT::Mode != (int)Samplers::Fill)
{
oneU = ((0x800000 + texWidth - 1) / texWidth) * 2 + 1;
oneV = ((0x800000 + texHeight - 1) / texHeight) * 2 + 1;
}
else
{
oneU = 0;
oneV = 0;
}
// Shade constants
__m128i inv_desaturate, shade_fade, shade_light;
int desaturate;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
inv_desaturate = _mm_setr_epi16(256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256, 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate(), 256 - args->ShadeDesaturate());
shade_fade = _mm_set_epi16(args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue(), args->ShadeFadeAlpha(), args->ShadeFadeRed(), args->ShadeFadeGreen(), args->ShadeFadeBlue());
shade_light = _mm_set_epi16(args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue(), args->ShadeLightAlpha(), args->ShadeLightRed(), args->ShadeLightGreen(), args->ShadeLightBlue());
desaturate = args->ShadeDesaturate();
}
else
{
inv_desaturate = _mm_setzero_si128();
shade_fade = _mm_setzero_si128();
shade_light = _mm_setzero_si128();
desaturate = 0;
}
// Setup light
uint32_t lightpos = args->Light();
lightpos += lightpos >> 7; // 255 -> 256
__m128i mlight = _mm_set_epi16(256, lightpos, lightpos, lightpos, 256, lightpos, lightpos, lightpos);
__m128i shade_fade_lit;
if (ShadeModeT::Mode == (int)ShadeMode::Advanced)
{
__m128i inv_light = _mm_sub_epi16(_mm_set_epi16(0, 256, 256, 256, 0, 256, 256, 256), mlight);
shade_fade_lit = _mm_mullo_epi16(shade_fade, inv_light);
}
else
{
shade_fade_lit = _mm_setzero_si128();
}
int count = x1 - x0;
int sseCount = count / 2;
uint32_t posV = startV;
for (int y = y0; y < y1; y++, posV += stepV)
{
int coreBlock = y / 8;
if (coreBlock % thread->num_cores != thread->core)
continue;
uint32_t *dest = ((uint32_t*)destOrg) + y * destPitch + x0;
uint32_t posU = startU;
for (int i = 0; i < sseCount; i++)
{
// Load bgcolor
__m128i bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
bgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)dest), _mm_setzero_si128());
else
bgcolor = _mm_setzero_si128();
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
ifgcolor[1] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[1] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
posU += stepU;
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
_mm_storel_epi64((__m128i*)dest, outcolor);
dest += 2;
}
if (sseCount * 2 != count)
{
// Load bgcolor
__m128i bgcolor;
if (BlendT::Mode != (int)BlendModes::Opaque)
bgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(*dest), _mm_setzero_si128());
else
bgcolor = _mm_setzero_si128();
// Sample fgcolor
unsigned int ifgcolor[2], ifgshade[2];
ifgcolor[0] = Sample32<SamplerT, FilterModeT>(posU, posV, texPixels, texWidth, texHeight, oneU, oneV, color, translation);
ifgshade[0] = SampleShade32<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
ifgcolor[1] = 0;
ifgshade[1] = 0;
posU += stepU;
// Shade and blend
__m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128());
fgcolor = Shade32<ShadeModeT>(fgcolor, mlight, ifgcolor[0], ifgcolor[1], desaturate, inv_desaturate, shade_fade_lit, shade_light);
__m128i outcolor = Blend32<BlendT>(fgcolor, bgcolor, ifgcolor[0], ifgcolor[1], ifgshade[0], ifgshade[1], srcalpha, destalpha);
// Store result
*dest = _mm_cvtsi128_si32(outcolor);
}
}
}
};

View File

@ -24,6 +24,187 @@
#include "screen_triangle.h"
namespace TriScreenDrawerModes
{
template<typename SamplerT>
FORCEINLINE unsigned int Sample8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation)
{
uint8_t texel;
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill)
{
return color;
}
else if (SamplerT::Mode == (int)Samplers::Translated)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return translation[texPixels[texelX * texHeight + texelY]];
}
else
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
texel = texPixels[texelX * texHeight + texelY];
}
if (SamplerT::Mode == (int)Samplers::Skycap)
{
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256);
int a = MIN(alpha_top, alpha_bottom);
int inv_a = 256 - a;
if (a == 256)
return texel;
uint32_t capcolor = GPalette.BaseColors[color].d;
uint32_t texelrgb = GPalette.BaseColors[texel].d;
uint32_t r = RPART(texelrgb);
uint32_t g = GPART(texelrgb);
uint32_t b = BPART(texelrgb);
uint32_t capcolor_red = RPART(capcolor);
uint32_t capcolor_green = GPART(capcolor);
uint32_t capcolor_blue = BPART(capcolor);
r = (r * a + capcolor_red * inv_a + 127) >> 8;
g = (g * a + capcolor_green * inv_a + 127) >> 8;
b = (b * a + capcolor_blue * inv_a + 127) >> 8;
return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)];
}
else
{
return texel;
}
}
template<typename SamplerT>
FORCEINLINE unsigned int SampleShade8(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight)
{
if (SamplerT::Mode == (int)Samplers::Shaded)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY];
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else if (SamplerT::Mode == (int)Samplers::Stencil)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0;
}
else
{
return 0;
}
}
template<typename BlendT>
FORCEINLINE uint8_t ShadeAndBlend8(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha)
{
lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00;
uint8_t shadedfg = colormaps[lightshade + fgcolor];
if (BlendT::Mode == (int)BlendModes::Opaque)
{
return shadedfg;
}
else if (BlendT::Mode == (int)BlendModes::Masked)
{
return (fgcolor != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
{
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7));
int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7));
int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7));
fg_r = MIN<int32_t>(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255);
fg_g = MIN<int32_t>(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255);
fg_b = MIN<int32_t>(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255);
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (fgcolor != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::Shaded)
{
fgshade = (fgshade * srcalpha + 128) >> 8;
uint32_t alpha = fgshade;
uint32_t inv_alpha = 256 - fgshade;
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8;
fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8;
fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8;
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (alpha != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
{
fgshade = (fgshade * srcalpha + 128) >> 8;
uint32_t alpha = fgshade;
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
fg_r = MIN<int32_t>(bg_r + ((fg_r * alpha + 127) >> 8), 255);
fg_g = MIN<int32_t>(bg_g + ((fg_g * alpha + 127) >> 8), 255);
fg_b = MIN<int32_t>(bg_b + ((fg_b * alpha + 127) >> 8), 255);
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (alpha != 0) ? shadedfg : bgcolor;
}
else
{
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
if (BlendT::Mode == (int)BlendModes::AddClamp)
{
fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255);
fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255);
fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255);
}
else if (BlendT::Mode == (int)BlendModes::SubClamp)
{
fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0);
fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0);
fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0);
}
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
{
fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0);
fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0);
fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0);
}
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (fgcolor != 0) ? shadedfg : bgcolor;
}
}
}
template<typename BlendT, typename SamplerT>
class TriScreenDrawer8
{
@ -123,9 +304,9 @@ public:
{
int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x * 8 + ix];
uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight);
dest[x * 8 + ix] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
dest[x * 8 + ix] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
posU += stepU;
posV += stepV;
lightpos += lightstep;
@ -185,9 +366,9 @@ public:
{
int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x];
uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight);
dest[x] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
dest[x] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
}
posU += stepU;
@ -236,9 +417,9 @@ public:
{
int lightshade = lightpos >> 8;
uint8_t bgcolor = dest[x];
uint8_t fgcolor = Sample(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade(posU, posV, texPixels, texWidth, texHeight);
dest[x] = ShadeAndBlend(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
dest[x] = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
}
posU += stepU;
@ -258,187 +439,6 @@ public:
}
private:
FORCEINLINE static unsigned int Sample(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight, uint32_t color, const uint8_t *translation)
{
using namespace TriScreenDrawerModes;
uint8_t texel;
if (SamplerT::Mode == (int)Samplers::Shaded || SamplerT::Mode == (int)Samplers::Stencil || SamplerT::Mode == (int)Samplers::Fill)
{
return color;
}
else if (SamplerT::Mode == (int)Samplers::Translated)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return translation[texPixels[texelX * texHeight + texelY]];
}
else
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
texel = texPixels[texelX * texHeight + texelY];
}
if (SamplerT::Mode == (int)Samplers::Skycap)
{
int start_fade = 2; // How fast it should fade out
int alpha_top = clamp(v >> (16 - start_fade), 0, 256);
int alpha_bottom = clamp(((2 << 24) - v) >> (16 - start_fade), 0, 256);
int a = MIN(alpha_top, alpha_bottom);
int inv_a = 256 - a;
if (a == 256)
return texel;
uint32_t capcolor = GPalette.BaseColors[color].d;
uint32_t texelrgb = GPalette.BaseColors[texel].d;
uint32_t r = RPART(texelrgb);
uint32_t g = GPART(texelrgb);
uint32_t b = BPART(texelrgb);
uint32_t capcolor_red = RPART(capcolor);
uint32_t capcolor_green = GPART(capcolor);
uint32_t capcolor_blue = BPART(capcolor);
r = (r * a + capcolor_red * inv_a + 127) >> 8;
g = (g * a + capcolor_green * inv_a + 127) >> 8;
b = (b * a + capcolor_blue * inv_a + 127) >> 8;
return RGB256k.All[((r >> 2) << 12) | ((g >> 2) << 6) | (b >> 2)];
}
else
{
return texel;
}
}
FORCEINLINE static unsigned int SampleShade(int32_t u, int32_t v, const uint8_t *texPixels, int texWidth, int texHeight)
{
using namespace TriScreenDrawerModes;
if (SamplerT::Mode == (int)Samplers::Shaded)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
unsigned int sampleshadeout = texPixels[texelX * texHeight + texelY];
sampleshadeout += sampleshadeout >> 7; // 255 -> 256
return sampleshadeout;
}
else if (SamplerT::Mode == (int)Samplers::Stencil)
{
uint32_t texelX = ((((uint32_t)u << 8) >> 16) * texWidth) >> 16;
uint32_t texelY = ((((uint32_t)v << 8) >> 16) * texHeight) >> 16;
return texPixels[texelX * texHeight + texelY] != 0 ? 256 : 0;
}
else
{
return 0;
}
}
FORCEINLINE static uint8_t ShadeAndBlend(uint8_t fgcolor, uint8_t bgcolor, uint32_t fgshade, uint32_t lightshade, const uint8_t *colormaps, uint32_t srcalpha, uint32_t destalpha)
{
using namespace TriScreenDrawerModes;
lightshade = ((256 - lightshade) * NUMCOLORMAPS) & 0xffffff00;
uint8_t shadedfg = colormaps[lightshade + fgcolor];
if (BlendT::Mode == (int)BlendModes::Opaque)
{
return shadedfg;
}
else if (BlendT::Mode == (int)BlendModes::Masked)
{
return (fgcolor != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddSrcColorOneMinusSrcColor)
{
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
int32_t inv_fg_r = 256 - (fg_r + (fg_r >> 7));
int32_t inv_fg_g = 256 - (fg_g + (fg_g >> 7));
int32_t inv_fg_b = 256 - (fg_b + (fg_b >> 7));
fg_r = MIN<int32_t>(fg_r + ((bg_r * inv_fg_r + 127) >> 8), 255);
fg_g = MIN<int32_t>(fg_g + ((bg_g * inv_fg_g + 127) >> 8), 255);
fg_b = MIN<int32_t>(fg_b + ((bg_b * inv_fg_b + 127) >> 8), 255);
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (fgcolor != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::Shaded)
{
fgshade = (fgshade * srcalpha + 128) >> 8;
uint32_t alpha = fgshade;
uint32_t inv_alpha = 256 - fgshade;
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
fg_r = (fg_r * alpha + bg_r * inv_alpha + 127) >> 8;
fg_g = (fg_g * alpha + bg_g * inv_alpha + 127) >> 8;
fg_b = (fg_b * alpha + bg_b * inv_alpha + 127) >> 8;
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (alpha != 0) ? shadedfg : bgcolor;
}
else if (BlendT::Mode == (int)BlendModes::AddClampShaded)
{
fgshade = (fgshade * srcalpha + 128) >> 8;
uint32_t alpha = fgshade;
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
fg_r = MIN<int32_t>(bg_r + ((fg_r * alpha + 127) >> 8), 255);
fg_g = MIN<int32_t>(bg_g + ((fg_g * alpha + 127) >> 8), 255);
fg_b = MIN<int32_t>(bg_b + ((fg_b * alpha + 127) >> 8), 255);
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (alpha != 0) ? shadedfg : bgcolor;
}
else
{
int32_t fg_r = GPalette.BaseColors[shadedfg].r;
int32_t fg_g = GPalette.BaseColors[shadedfg].g;
int32_t fg_b = GPalette.BaseColors[shadedfg].b;
int32_t bg_r = GPalette.BaseColors[bgcolor].r;
int32_t bg_g = GPalette.BaseColors[bgcolor].g;
int32_t bg_b = GPalette.BaseColors[bgcolor].b;
if (BlendT::Mode == (int)BlendModes::AddClamp)
{
fg_r = MIN(int32_t(fg_r * srcalpha + bg_r * destalpha + 127) >> 8, 255);
fg_g = MIN(int32_t(fg_g * srcalpha + bg_g * destalpha + 127) >> 8, 255);
fg_b = MIN(int32_t(fg_b * srcalpha + bg_b * destalpha + 127) >> 8, 255);
}
else if (BlendT::Mode == (int)BlendModes::SubClamp)
{
fg_r = MAX(int32_t(fg_r * srcalpha - bg_r * destalpha + 127) >> 8, 0);
fg_g = MAX(int32_t(fg_g * srcalpha - bg_g * destalpha + 127) >> 8, 0);
fg_b = MAX(int32_t(fg_b * srcalpha - bg_b * destalpha + 127) >> 8, 0);
}
else if (BlendT::Mode == (int)BlendModes::RevSubClamp)
{
fg_r = MAX(int32_t(bg_r * srcalpha - fg_r * destalpha + 127) >> 8, 0);
fg_g = MAX(int32_t(bg_g * srcalpha - fg_g * destalpha + 127) >> 8, 0);
fg_b = MAX(int32_t(bg_b * srcalpha - fg_b * destalpha + 127) >> 8, 0);
}
shadedfg = RGB256k.All[((fg_r >> 2) << 12) | ((fg_g >> 2) << 6) | (fg_b >> 2)];
return (fgcolor != 0) ? shadedfg : bgcolor;
}
}
static float FindGradientX(float x0, float y0, float x1, float y1, float x2, float y2, float c0, float c1, float c2)
{
float top = (c1 - c2) * (y0 - y2) - (c0 - c2) * (y1 - y2);
@ -453,3 +453,68 @@ private:
return top / bottom;
}
};
template<typename BlendT, typename SamplerT>
class RectScreenDrawer8
{
public:
static void Execute(const void *destOrg, int destWidth, int destHeight, int destPitch, const RectDrawArgs *args, WorkerThreadData *thread)
{
using namespace TriScreenDrawerModes;
int x0 = clamp((int)(args->X0() + 0.5f), 0, destWidth);
int x1 = clamp((int)(args->X1() + 0.5f), 0, destWidth);
int y0 = clamp((int)(args->Y0() + 0.5f), 0, destHeight);
int y1 = clamp((int)(args->Y1() + 0.5f), 0, destHeight);
if (x1 <= x0 || y1 <= y0)
return;
auto colormaps = args->BaseColormap();
uint32_t srcalpha = args->SrcAlpha();
uint32_t destalpha = args->DestAlpha();
// Setup step variables
float fstepU = (args->U1() - args->U0()) / (args->X1() - args->X0());
float fstepV = (args->V1() - args->V0()) / (args->Y1() - args->Y0());
uint32_t startU = (int32_t)((args->U0() + (x0 + 0.5f - args->X0()) * fstepU) * 0x1000000);
uint32_t startV = (int32_t)((args->V0() + (y0 + 0.5f - args->Y0()) * fstepV) * 0x1000000);
uint32_t stepU = (int32_t)(fstepU * 0x1000000);
uint32_t stepV = (int32_t)(fstepV * 0x1000000);
// Sampling stuff
uint32_t color = args->Color();
const uint8_t * RESTRICT translation = args->Translation();
const uint8_t * RESTRICT texPixels = args->TexturePixels();
uint32_t texWidth = args->TextureWidth();
uint32_t texHeight = args->TextureHeight();
// Setup light
uint32_t lightshade = args->Light();
lightshade += lightshade >> 7; // 255 -> 256
int count = x1 - x0;
uint32_t posV = startV;
for (int y = y0; y < y1; y++, posV += stepV)
{
int coreBlock = y / 8;
if (coreBlock % thread->num_cores != thread->core)
continue;
uint8_t *dest = ((uint8_t*)destOrg) + y * destPitch + x0;
uint32_t posU = startU;
for (int i = 0; i < count; i++)
{
uint8_t bgcolor = *dest;
uint8_t fgcolor = Sample8<SamplerT>(posU, posV, texPixels, texWidth, texHeight, color, translation);
uint32_t fgshade = SampleShade8<SamplerT>(posU, posV, texPixels, texWidth, texHeight);
*dest = ShadeAndBlend8<BlendT>(fgcolor, bgcolor, fgshade, lightshade, colormaps, srcalpha, destalpha);
posU += stepU;
dest++;
}
}
}
};

View File

@ -449,3 +449,23 @@ void DrawPolyTrianglesCommand::Execute(DrawerThread *thread)
PolyTriangleDrawer::draw_arrays(args, &thread_data);
}
/////////////////////////////////////////////////////////////////////////////
void DrawRectCommand::Execute(DrawerThread *thread)
{
WorkerThreadData thread_data;
thread_data.core = thread->core;
thread_data.num_cores = thread->num_cores;
auto renderTarget = PolyRenderer::Instance()->RenderTarget;
const void *destOrg = renderTarget->GetBuffer();
int destWidth = renderTarget->GetWidth();
int destHeight = renderTarget->GetHeight();
int destPitch = renderTarget->GetPitch();
int blendmode = (int)args.BlendMode();
if (renderTarget->IsBgra())
ScreenTriangle::RectDrawers32[blendmode](destOrg, destWidth, destHeight, destPitch, &args, &thread_data);
else
ScreenTriangle::RectDrawers8[blendmode](destOrg, destWidth, destHeight, destPitch, &args, &thread_data);
}

View File

@ -71,3 +71,15 @@ public:
private:
PolyDrawArgs args;
};
class DrawRectCommand : public DrawerCommand
{
public:
DrawRectCommand(const RectDrawArgs &args) : args(args) { }
void Execute(DrawerThread *thread) override;
FString DebugInfo() override { return "DrawRect"; }
private:
RectDrawArgs args;
};

View File

@ -922,7 +922,7 @@ void ScreenTriangle::SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThrea
}
}
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTriangle::TriDrawers8 =
void(*ScreenTriangle::TriDrawers8[])(const TriDrawTriangleArgs *, WorkerThreadData *) =
{
&TriScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
&TriScreenDrawer8<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
@ -950,11 +950,14 @@ std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTria
#ifdef NO_SSE
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTriangle::TriDrawers32;
void(*ScreenTriangle::TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *) =
{
nullptr
};
#else
std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTriangle::TriDrawers32 =
void(*ScreenTriangle::TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *) =
{
&TriScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
&TriScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
@ -981,3 +984,66 @@ std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> ScreenTria
};
#endif
void(*ScreenTriangle::RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
{
&RectScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
&RectScreenDrawer8<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
&RectScreenDrawer8<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAdd
&RectScreenDrawer8<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureSub
&RectScreenDrawer8<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureRevSub
&RectScreenDrawer8<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAddSrcColor
&RectScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedOpaque
&RectScreenDrawer8<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedMasked
&RectScreenDrawer8<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAdd
&RectScreenDrawer8<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedSub
&RectScreenDrawer8<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedRevSub
&RectScreenDrawer8<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAddSrcColor
&RectScreenDrawer8<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // Shaded
&RectScreenDrawer8<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // AddShaded
&RectScreenDrawer8<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // Stencil
&RectScreenDrawer8<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // AddStencil
&RectScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillOpaque
&RectScreenDrawer8<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAdd
&RectScreenDrawer8<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillSub
&RectScreenDrawer8<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillRevSub
&RectScreenDrawer8<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAddSrcColor
&RectScreenDrawer8<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::SkycapSampler>::Execute // Skycap
};
#ifdef NO_SSE
void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
{
nullptr
};
#else
void(*ScreenTriangle::RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *) =
{
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureOpaque
&RectScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureMasked
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAdd
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureSub
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureRevSub
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TextureSampler>::Execute, // TextureAddSrcColor
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedOpaque
&RectScreenDrawer32<TriScreenDrawerModes::MaskedBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedMasked
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAdd
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedSub
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedRevSub
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::TranslatedSampler>::Execute, // TranslatedAddSrcColor
&RectScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // Shaded
&RectScreenDrawer32<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::ShadedSampler>::Execute, // AddShaded
&RectScreenDrawer32<TriScreenDrawerModes::ShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // Stencil
&RectScreenDrawer32<TriScreenDrawerModes::AddClampShadedBlend, TriScreenDrawerModes::StencilSampler>::Execute, // AddStencil
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillOpaque
&RectScreenDrawer32<TriScreenDrawerModes::AddClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAdd
&RectScreenDrawer32<TriScreenDrawerModes::SubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillSub
&RectScreenDrawer32<TriScreenDrawerModes::RevSubClampBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillRevSub
&RectScreenDrawer32<TriScreenDrawerModes::AddSrcColorBlend, TriScreenDrawerModes::FillSampler>::Execute, // FillAddSrcColor
&RectScreenDrawer32<TriScreenDrawerModes::OpaqueBlend, TriScreenDrawerModes::SkycapSampler>::Execute // Skycap
};
#endif

View File

@ -85,6 +85,8 @@ struct TriDrawTriangleArgs
const PolyDrawArgs *uniforms;
};
class RectDrawArgs;
enum class TriBlendMode
{
TextureOpaque,
@ -119,8 +121,10 @@ public:
static void StencilWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
static void SubsectorWrite(const TriDrawTriangleArgs *args, WorkerThreadData *thread);
static std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriDrawers8;
static std::vector<void(*)(const TriDrawTriangleArgs *, WorkerThreadData *)> TriDrawers32;
static void(*TriDrawers8[])(const TriDrawTriangleArgs *, WorkerThreadData *);
static void(*TriDrawers32[])(const TriDrawTriangleArgs *, WorkerThreadData *);
static void(*RectDrawers8[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
static void(*RectDrawers32[])(const void *, int, int, int, const RectDrawArgs *, WorkerThreadData *);
};
struct ScreenTriangleStepVariables

View File

@ -576,52 +576,28 @@ fixed_t RenderPolyPlayerSprites::LightLevelToShade(int lightlevel, bool foggy)
void PolyNoAccelPlayerSprite::Render()
{
#if 0
if (xscale == 0 || fabs(yscale) < (1.0f / 32000.0f))
{ // scaled to 0; can't see
return;
}
SpriteDrawerArgs drawerargs;
drawerargs.SetLight(Light.BaseColormap, 0, Light.ColormapNum << FRACBITS);
FDynamicColormap *basecolormap = static_cast<FDynamicColormap*>(Light.BaseColormap);
bool visible = drawerargs.SetStyle(RenderStyle, Alpha, Translation, FillColor, basecolormap, Light.ColormapNum << FRACBITS);
if (!visible)
return;
double spryscale = yscale;
bool sprflipvert = false;
fixed_t iscale = FLOAT2FIXED(1 / yscale);
RectDrawArgs args;
args.SetStyle(RenderStyle, Alpha, FillColor, Translation, pic, false);
args.SetLight(Light.BaseColormap, 255 - (Light.ColormapNum << 3));
double centerY = viewheight / 2;
double sprtopscreen;
double y1, y2;
if (renderflags & RF_YFLIP)
{
sprflipvert = true;
spryscale = -spryscale;
iscale = -iscale;
sprtopscreen = centerY + (texturemid - pic->GetHeight()) * spryscale;
y1 = centerY + (texturemid - pic->GetHeight()) * (-yscale);
y2 = y1 + pic->GetHeight() * (-yscale);
}
else
{
sprflipvert = false;
sprtopscreen = centerY - texturemid * spryscale;
y1 = centerY - texturemid * yscale;
y2 = y1 + pic->GetHeight() * yscale;
}
// clip to screen bounds
short *mfloorclip = screenheightarray;
short *mceilingclip = zeroarray;
fixed_t frac = startfrac;
for (int x = x1; x < x2; x++)
{
drawerargs.DrawMaskedColumn(x, iscale, pic, frac, spryscale, sprtopscreen, sprflipvert, mfloorclip, mceilingclip, false);
frac += xiscale;
}
#endif
args.Draw(x1, x2, y1, y2, 0.0f, 1.0f, 0.0f, 1.0f);
}
/////////////////////////////////////////////////////////////////////////////