mirror of
https://github.com/ZDoom/qzdoom.git
synced 2024-11-14 08:30:49 +00:00
Blend mode fixes
This commit is contained in:
parent
d3bc68a160
commit
6daeb5a158
4 changed files with 76 additions and 96 deletions
|
@ -2538,8 +2538,8 @@ public:
|
||||||
{
|
{
|
||||||
uint32_t pix = source[frac >> bits];
|
uint32_t pix = source[frac >> bits];
|
||||||
|
|
||||||
uint32_t fg_alpha, bg_alpha;
|
uint32_t fg_alpha = src_alpha;
|
||||||
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha);
|
uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
|
||||||
|
|
||||||
uint32_t fg = shade_bgra(pix, light, shade_constants);
|
uint32_t fg = shade_bgra(pix, light, shade_constants);
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
|
@ -2631,8 +2631,8 @@ public:
|
||||||
{
|
{
|
||||||
uint32_t pix = bufplce[i][local_vplce[i] >> bits];
|
uint32_t pix = bufplce[i][local_vplce[i] >> bits];
|
||||||
|
|
||||||
uint32_t fg_alpha, bg_alpha;
|
uint32_t fg_alpha = src_alpha;
|
||||||
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha);
|
uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
|
||||||
|
|
||||||
uint32_t fg = shade_bgra(pix, light[i], shade_constants);
|
uint32_t fg = shade_bgra(pix, light[i], shade_constants);
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
|
@ -2709,8 +2709,8 @@ public:
|
||||||
{
|
{
|
||||||
uint32_t pix = source[frac >> bits];
|
uint32_t pix = source[frac >> bits];
|
||||||
|
|
||||||
uint32_t fg_alpha, bg_alpha;
|
uint32_t fg_alpha = src_alpha;
|
||||||
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha);
|
uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
|
||||||
|
|
||||||
uint32_t fg = shade_bgra(pix, light, shade_constants);
|
uint32_t fg = shade_bgra(pix, light, shade_constants);
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
|
@ -2802,8 +2802,8 @@ public:
|
||||||
{
|
{
|
||||||
uint32_t pix = bufplce[i][local_vplce[i] >> bits];
|
uint32_t pix = bufplce[i][local_vplce[i] >> bits];
|
||||||
|
|
||||||
uint32_t fg_alpha, bg_alpha;
|
uint32_t fg_alpha = src_alpha;
|
||||||
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha);
|
uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
|
||||||
|
|
||||||
uint32_t fg = shade_bgra(pix, light[i], shade_constants);
|
uint32_t fg = shade_bgra(pix, light[i], shade_constants);
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
|
@ -2880,8 +2880,8 @@ public:
|
||||||
{
|
{
|
||||||
uint32_t pix = source[frac >> bits];
|
uint32_t pix = source[frac >> bits];
|
||||||
|
|
||||||
uint32_t fg_alpha, bg_alpha;
|
uint32_t fg_alpha = src_alpha;
|
||||||
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha);
|
uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
|
||||||
|
|
||||||
uint32_t fg = shade_bgra(pix, light, shade_constants);
|
uint32_t fg = shade_bgra(pix, light, shade_constants);
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
|
@ -2973,8 +2973,8 @@ public:
|
||||||
{
|
{
|
||||||
uint32_t pix = bufplce[i][local_vplce[i] >> bits];
|
uint32_t pix = bufplce[i][local_vplce[i] >> bits];
|
||||||
|
|
||||||
uint32_t fg_alpha, bg_alpha;
|
uint32_t fg_alpha = src_alpha;
|
||||||
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha);
|
uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
|
||||||
|
|
||||||
uint32_t fg = shade_bgra(pix, light[i], shade_constants);
|
uint32_t fg = shade_bgra(pix, light[i], shade_constants);
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
|
@ -3051,8 +3051,8 @@ public:
|
||||||
{
|
{
|
||||||
uint32_t pix = source[frac >> bits];
|
uint32_t pix = source[frac >> bits];
|
||||||
|
|
||||||
uint32_t fg_alpha, bg_alpha;
|
uint32_t fg_alpha = src_alpha;
|
||||||
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha);
|
uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
|
||||||
|
|
||||||
uint32_t fg = shade_bgra(pix, light, shade_constants);
|
uint32_t fg = shade_bgra(pix, light, shade_constants);
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
|
@ -3144,8 +3144,8 @@ public:
|
||||||
{
|
{
|
||||||
uint32_t pix = bufplce[i][local_vplce[i] >> bits];
|
uint32_t pix = bufplce[i][local_vplce[i] >> bits];
|
||||||
|
|
||||||
uint32_t fg_alpha, bg_alpha;
|
uint32_t fg_alpha = src_alpha;
|
||||||
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha);
|
uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
|
||||||
|
|
||||||
uint32_t fg = shade_bgra(pix, light[i], shade_constants);
|
uint32_t fg = shade_bgra(pix, light[i], shade_constants);
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
|
|
|
@ -339,6 +339,7 @@ FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light)
|
||||||
FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants)
|
FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants)
|
||||||
{
|
{
|
||||||
const PalEntry &color = GPalette.BaseColors[index];
|
const PalEntry &color = GPalette.BaseColors[index];
|
||||||
|
uint32_t alpha = color.d & 0xff000000;
|
||||||
uint32_t red = color.r;
|
uint32_t red = color.r;
|
||||||
uint32_t green = color.g;
|
uint32_t green = color.g;
|
||||||
uint32_t blue = color.b;
|
uint32_t blue = color.b;
|
||||||
|
@ -367,11 +368,12 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade
|
||||||
green = (green * constants.light_green) / 256;
|
green = (green * constants.light_green) / 256;
|
||||||
blue = (blue * constants.light_blue) / 256;
|
blue = (blue * constants.light_blue) / 256;
|
||||||
}
|
}
|
||||||
return 0xff000000 | (red << 16) | (green << 8) | blue;
|
return alpha | (red << 16) | (green << 8) | blue;
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants)
|
FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants)
|
||||||
{
|
{
|
||||||
|
uint32_t alpha = color & 0xff000000;
|
||||||
uint32_t red = (color >> 16) & 0xff;
|
uint32_t red = (color >> 16) & 0xff;
|
||||||
uint32_t green = (color >> 8) & 0xff;
|
uint32_t green = (color >> 8) & 0xff;
|
||||||
uint32_t blue = color & 0xff;
|
uint32_t blue = color & 0xff;
|
||||||
|
@ -400,12 +402,12 @@ FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConst
|
||||||
green = (green * constants.light_green) / 256;
|
green = (green * constants.light_green) / 256;
|
||||||
blue = (blue * constants.light_blue) / 256;
|
blue = (blue * constants.light_blue) / 256;
|
||||||
}
|
}
|
||||||
return 0xff000000 | (red << 16) | (green << 8) | blue;
|
return alpha | (red << 16) | (green << 8) | blue;
|
||||||
}
|
}
|
||||||
|
|
||||||
FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
|
FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
|
||||||
{
|
{
|
||||||
uint32_t fg_alpha = (fg >> 24) & 0xff;
|
uint32_t fg_alpha = fg >> 24;
|
||||||
uint32_t fg_red = (fg >> 16) & 0xff;
|
uint32_t fg_red = (fg >> 16) & 0xff;
|
||||||
uint32_t fg_green = (fg >> 8) & 0xff;
|
uint32_t fg_green = (fg >> 8) & 0xff;
|
||||||
uint32_t fg_blue = fg & 0xff;
|
uint32_t fg_blue = fg & 0xff;
|
||||||
|
@ -468,11 +470,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
|
||||||
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
|
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
|
||||||
__m256 m255 = _mm256_set1_ps(255.0f); \
|
__m256 m255 = _mm256_set1_ps(255.0f); \
|
||||||
__m256 color = _mm256_set_ps( \
|
__m256 color = _mm256_set_ps( \
|
||||||
shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
|
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
|
||||||
shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
|
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
|
||||||
__m256 fade = _mm256_set_ps( \
|
__m256 fade = _mm256_set_ps( \
|
||||||
shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
|
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
|
||||||
shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
|
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
|
||||||
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
|
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
|
||||||
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
|
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
|
||||||
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
|
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
|
||||||
|
@ -488,11 +490,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
|
||||||
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
|
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
|
||||||
__m256 m255 = _mm256_set1_ps(255.0f); \
|
__m256 m255 = _mm256_set1_ps(255.0f); \
|
||||||
__m256 color = _mm256_set_ps( \
|
__m256 color = _mm256_set_ps( \
|
||||||
shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
|
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
|
||||||
shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
|
1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
|
||||||
__m256 fade = _mm256_set_ps( \
|
__m256 fade = _mm256_set_ps( \
|
||||||
shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
|
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
|
||||||
shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
|
0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
|
||||||
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
|
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
|
||||||
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
|
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
|
||||||
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
|
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
|
||||||
|
@ -585,40 +587,31 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
|
||||||
fg = _mm_adds_epu8(fg, bg); \
|
fg = _mm_adds_epu8(fg, bg); \
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
// Calculates the final alpha values to be used when combined with the source texture alpha channel
|
||||||
FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha)
|
FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha)
|
||||||
{
|
{
|
||||||
fg_alpha = src_alpha;
|
uint32_t alpha = fg >> 24;
|
||||||
bg_alpha = dest_alpha;
|
alpha += alpha >> 7;
|
||||||
|
return 256 - alpha; // (dest_alpha * (256 - alpha)) >> 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \
|
#define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \
|
||||||
__m128i fg_alpha_hi = msrc_alpha; \
|
__m128i msrc_alpha = _mm_set1_epi16(src_alpha); \
|
||||||
__m128i fg_alpha_lo = msrc_alpha; \
|
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
||||||
__m128i bg_alpha_hi = mdest_alpha; \
|
|
||||||
__m128i bg_alpha_lo = mdest_alpha;
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Calculates the final alpha values to be used when combined with the source texture alpha channel
|
// Calculates the final alpha values to be used when combined with the source texture alpha channel
|
||||||
FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha)
|
#define VEC_CALC_BLEND_ALPHA(fg) \
|
||||||
{
|
__m128i fg_alpha_hi, fg_alpha_lo, bg_alpha_hi, bg_alpha_lo; { \
|
||||||
fg_alpha = (fg >> 24) & 0xff;
|
__m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
|
||||||
fg_alpha += fg_alpha >> 7;
|
__m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
|
||||||
bg_alpha = (dest_alpha * (256 - fg_alpha)) >> 8;
|
alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \
|
||||||
fg_alpha = (src_alpha * fg_alpha) >> 8;
|
alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \
|
||||||
|
bg_alpha_hi = _mm_sub_epi16(_mm_set1_epi16(256), alpha_hi); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_hi), mdest_alpha), 8);*/ \
|
||||||
|
bg_alpha_lo = _mm_sub_epi16(_mm_set1_epi16(256), alpha_lo); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_lo), mdest_alpha), 8);*/ \
|
||||||
|
fg_alpha_hi = msrc_alpha; \
|
||||||
|
fg_alpha_lo = msrc_alpha; \
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculates the final alpha values to be used when combined with the source texture alpha channel
|
|
||||||
#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \
|
|
||||||
__m128i fg_alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
|
|
||||||
__m128i fg_alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
|
|
||||||
fg_alpha_hi = _mm_add_epi16(fg_alpha_hi, _mm_srli_epi16(fg_alpha_hi, 7)); \
|
|
||||||
fg_alpha_lo = _mm_add_epi16(fg_alpha_lo, _mm_srli_epi16(fg_alpha_lo, 7)); \
|
|
||||||
__m128i bg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_hi), mdest_alpha), 8); \
|
|
||||||
__m128i bg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_lo), mdest_alpha), 8); \
|
|
||||||
fg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_hi, msrc_alpha), 8); \
|
|
||||||
fg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_lo, msrc_alpha), 8);
|
|
||||||
|
|
||||||
// Calculate constants for a simple shade
|
// Calculate constants for a simple shade
|
||||||
#define SSE_SHADE_SIMPLE_INIT(light) \
|
#define SSE_SHADE_SIMPLE_INIT(light) \
|
||||||
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
|
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
|
||||||
|
@ -645,11 +638,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest
|
||||||
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
|
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
|
||||||
__m128i mlight_lo = mlight_hi; \
|
__m128i mlight_lo = mlight_hi; \
|
||||||
__m128i color = _mm_set_epi16( \
|
__m128i color = _mm_set_epi16( \
|
||||||
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
|
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
|
||||||
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
|
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
|
||||||
__m128i fade = _mm_set_epi16( \
|
__m128i fade = _mm_set_epi16( \
|
||||||
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
|
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
|
||||||
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
|
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
|
||||||
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
|
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
|
||||||
__m128i fade_amount_lo = fade_amount_hi; \
|
__m128i fade_amount_lo = fade_amount_hi; \
|
||||||
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
|
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
|
||||||
|
@ -659,11 +652,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest
|
||||||
__m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
|
__m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
|
||||||
__m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \
|
__m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \
|
||||||
__m128i color = _mm_set_epi16( \
|
__m128i color = _mm_set_epi16( \
|
||||||
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
|
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
|
||||||
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
|
256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
|
||||||
__m128i fade = _mm_set_epi16( \
|
__m128i fade = _mm_set_epi16( \
|
||||||
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
|
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
|
||||||
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
|
0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
|
||||||
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
|
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
|
||||||
__m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \
|
__m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \
|
||||||
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
|
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
|
||||||
|
|
|
@ -554,9 +554,7 @@ public:
|
||||||
if (shade_constants.simple_shade)
|
if (shade_constants.simple_shade)
|
||||||
{
|
{
|
||||||
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
|
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
|
||||||
|
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
|
||||||
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
|
|
||||||
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -572,7 +570,7 @@ public:
|
||||||
|
|
||||||
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
||||||
|
|
||||||
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha);
|
VEC_CALC_BLEND_ALPHA(fg);
|
||||||
VEC_SHADE_SIMPLE(fg);
|
VEC_SHADE_SIMPLE(fg);
|
||||||
|
|
||||||
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
||||||
|
@ -593,9 +591,7 @@ public:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
|
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
|
||||||
|
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
|
||||||
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
|
|
||||||
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -610,7 +606,7 @@ public:
|
||||||
local_vplce[3] = local_vplce[3] + local_vince[3];
|
local_vplce[3] = local_vplce[3] + local_vince[3];
|
||||||
|
|
||||||
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
||||||
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha);
|
VEC_CALC_BLEND_ALPHA(fg);
|
||||||
VEC_SHADE(fg, shade_constants);
|
VEC_SHADE(fg, shade_constants);
|
||||||
|
|
||||||
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
||||||
|
@ -697,9 +693,7 @@ public:
|
||||||
if (shade_constants.simple_shade)
|
if (shade_constants.simple_shade)
|
||||||
{
|
{
|
||||||
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
|
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
|
||||||
|
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
|
||||||
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
|
|
||||||
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -714,7 +708,7 @@ public:
|
||||||
local_vplce[3] = local_vplce[3] + local_vince[3];
|
local_vplce[3] = local_vplce[3] + local_vince[3];
|
||||||
|
|
||||||
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
||||||
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha);
|
VEC_CALC_BLEND_ALPHA(fg);
|
||||||
VEC_SHADE_SIMPLE(fg);
|
VEC_SHADE_SIMPLE(fg);
|
||||||
|
|
||||||
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
||||||
|
@ -735,9 +729,7 @@ public:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
|
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
|
||||||
|
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
|
||||||
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
|
|
||||||
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -752,7 +744,7 @@ public:
|
||||||
local_vplce[3] = local_vplce[3] + local_vince[3];
|
local_vplce[3] = local_vplce[3] + local_vince[3];
|
||||||
|
|
||||||
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
||||||
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha);
|
VEC_CALC_BLEND_ALPHA(fg);
|
||||||
VEC_SHADE(fg, shade_constants);
|
VEC_SHADE(fg, shade_constants);
|
||||||
|
|
||||||
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
||||||
|
@ -839,9 +831,7 @@ public:
|
||||||
if (shade_constants.simple_shade)
|
if (shade_constants.simple_shade)
|
||||||
{
|
{
|
||||||
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
|
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
|
||||||
|
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
|
||||||
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
|
|
||||||
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -856,7 +846,7 @@ public:
|
||||||
local_vplce[3] = local_vplce[3] + local_vince[3];
|
local_vplce[3] = local_vplce[3] + local_vince[3];
|
||||||
|
|
||||||
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
||||||
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha);
|
VEC_CALC_BLEND_ALPHA(fg);
|
||||||
VEC_SHADE_SIMPLE(fg);
|
VEC_SHADE_SIMPLE(fg);
|
||||||
|
|
||||||
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
||||||
|
@ -877,9 +867,7 @@ public:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
|
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
|
||||||
|
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
|
||||||
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
|
|
||||||
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -894,7 +882,7 @@ public:
|
||||||
local_vplce[3] = local_vplce[3] + local_vince[3];
|
local_vplce[3] = local_vplce[3] + local_vince[3];
|
||||||
|
|
||||||
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
||||||
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha);
|
VEC_CALC_BLEND_ALPHA(fg);
|
||||||
VEC_SHADE(fg, shade_constants);
|
VEC_SHADE(fg, shade_constants);
|
||||||
|
|
||||||
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
||||||
|
@ -981,9 +969,7 @@ public:
|
||||||
if (shade_constants.simple_shade)
|
if (shade_constants.simple_shade)
|
||||||
{
|
{
|
||||||
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
|
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
|
||||||
|
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
|
||||||
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
|
|
||||||
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -998,7 +984,7 @@ public:
|
||||||
local_vplce[3] = local_vplce[3] + local_vince[3];
|
local_vplce[3] = local_vplce[3] + local_vince[3];
|
||||||
|
|
||||||
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
||||||
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha);
|
VEC_CALC_BLEND_ALPHA(fg);
|
||||||
VEC_SHADE_SIMPLE(fg);
|
VEC_SHADE_SIMPLE(fg);
|
||||||
|
|
||||||
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
||||||
|
@ -1019,9 +1005,7 @@ public:
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
|
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
|
||||||
|
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
|
||||||
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
|
|
||||||
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
|
|
||||||
|
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
|
@ -1036,7 +1020,7 @@ public:
|
||||||
local_vplce[3] = local_vplce[3] + local_vince[3];
|
local_vplce[3] = local_vplce[3] + local_vince[3];
|
||||||
|
|
||||||
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
|
||||||
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha);
|
VEC_CALC_BLEND_ALPHA(fg);
|
||||||
VEC_SHADE(fg, shade_constants);
|
VEC_SHADE(fg, shade_constants);
|
||||||
|
|
||||||
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
__m128i bg = _mm_loadu_si128((const __m128i*)dest);
|
||||||
|
|
|
@ -203,7 +203,10 @@ const uint32_t *FTexture::GetPixelsBgra()
|
||||||
PixelsBgra.resize(Width * Height);
|
PixelsBgra.resize(Width * Height);
|
||||||
for (int i = 0; i < Width * Height; i++)
|
for (int i = 0; i < Width * Height; i++)
|
||||||
{
|
{
|
||||||
|
if (indices[i] != 0)
|
||||||
PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d;
|
PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d;
|
||||||
|
else
|
||||||
|
PixelsBgra[i] = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return PixelsBgra.data();
|
return PixelsBgra.data();
|
||||||
|
|
Loading…
Reference in a new issue