Blend mode fixes

This commit is contained in:
Magnus Norddahl 2016-06-20 02:36:54 +02:00
parent d3bc68a160
commit 6daeb5a158
4 changed files with 76 additions and 96 deletions

View file

@ -2538,8 +2538,8 @@ public:
{ {
uint32_t pix = source[frac >> bits]; uint32_t pix = source[frac >> bits];
uint32_t fg_alpha, bg_alpha; uint32_t fg_alpha = src_alpha;
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg = shade_bgra(pix, light, shade_constants);
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;
@ -2631,8 +2631,8 @@ public:
{ {
uint32_t pix = bufplce[i][local_vplce[i] >> bits]; uint32_t pix = bufplce[i][local_vplce[i] >> bits];
uint32_t fg_alpha, bg_alpha; uint32_t fg_alpha = src_alpha;
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg = shade_bgra(pix, light[i], shade_constants);
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;
@ -2709,8 +2709,8 @@ public:
{ {
uint32_t pix = source[frac >> bits]; uint32_t pix = source[frac >> bits];
uint32_t fg_alpha, bg_alpha; uint32_t fg_alpha = src_alpha;
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg = shade_bgra(pix, light, shade_constants);
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;
@ -2802,8 +2802,8 @@ public:
{ {
uint32_t pix = bufplce[i][local_vplce[i] >> bits]; uint32_t pix = bufplce[i][local_vplce[i] >> bits];
uint32_t fg_alpha, bg_alpha; uint32_t fg_alpha = src_alpha;
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg = shade_bgra(pix, light[i], shade_constants);
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;
@ -2880,8 +2880,8 @@ public:
{ {
uint32_t pix = source[frac >> bits]; uint32_t pix = source[frac >> bits];
uint32_t fg_alpha, bg_alpha; uint32_t fg_alpha = src_alpha;
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg = shade_bgra(pix, light, shade_constants);
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;
@ -2973,8 +2973,8 @@ public:
{ {
uint32_t pix = bufplce[i][local_vplce[i] >> bits]; uint32_t pix = bufplce[i][local_vplce[i] >> bits];
uint32_t fg_alpha, bg_alpha; uint32_t fg_alpha = src_alpha;
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg = shade_bgra(pix, light[i], shade_constants);
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;
@ -3051,8 +3051,8 @@ public:
{ {
uint32_t pix = source[frac >> bits]; uint32_t pix = source[frac >> bits];
uint32_t fg_alpha, bg_alpha; uint32_t fg_alpha = src_alpha;
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
uint32_t fg = shade_bgra(pix, light, shade_constants); uint32_t fg = shade_bgra(pix, light, shade_constants);
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;
@ -3144,8 +3144,8 @@ public:
{ {
uint32_t pix = bufplce[i][local_vplce[i] >> bits]; uint32_t pix = bufplce[i][local_vplce[i] >> bits];
uint32_t fg_alpha, bg_alpha; uint32_t fg_alpha = src_alpha;
calc_blend_alpha(pix, src_alpha, dest_alpha, fg_alpha, bg_alpha); uint32_t bg_alpha = calc_blend_bgalpha(pix, dest_alpha);
uint32_t fg = shade_bgra(pix, light[i], shade_constants); uint32_t fg = shade_bgra(pix, light[i], shade_constants);
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;

View file

@ -339,6 +339,7 @@ FORCEINLINE uint32_t shade_bgra_simple(uint32_t color, uint32_t light)
FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants) FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const ShadeConstants &constants)
{ {
const PalEntry &color = GPalette.BaseColors[index]; const PalEntry &color = GPalette.BaseColors[index];
uint32_t alpha = color.d & 0xff000000;
uint32_t red = color.r; uint32_t red = color.r;
uint32_t green = color.g; uint32_t green = color.g;
uint32_t blue = color.b; uint32_t blue = color.b;
@ -367,11 +368,12 @@ FORCEINLINE uint32_t shade_pal_index(uint32_t index, uint32_t light, const Shade
green = (green * constants.light_green) / 256; green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256; blue = (blue * constants.light_blue) / 256;
} }
return 0xff000000 | (red << 16) | (green << 8) | blue; return alpha | (red << 16) | (green << 8) | blue;
} }
FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants) FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConstants &constants)
{ {
uint32_t alpha = color & 0xff000000;
uint32_t red = (color >> 16) & 0xff; uint32_t red = (color >> 16) & 0xff;
uint32_t green = (color >> 8) & 0xff; uint32_t green = (color >> 8) & 0xff;
uint32_t blue = color & 0xff; uint32_t blue = color & 0xff;
@ -400,12 +402,12 @@ FORCEINLINE uint32_t shade_bgra(uint32_t color, uint32_t light, const ShadeConst
green = (green * constants.light_green) / 256; green = (green * constants.light_green) / 256;
blue = (blue * constants.light_blue) / 256; blue = (blue * constants.light_blue) / 256;
} }
return 0xff000000 | (red << 16) | (green << 8) | blue; return alpha | (red << 16) | (green << 8) | blue;
} }
FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg) FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
{ {
uint32_t fg_alpha = (fg >> 24) & 0xff; uint32_t fg_alpha = fg >> 24;
uint32_t fg_red = (fg >> 16) & 0xff; uint32_t fg_red = (fg >> 16) & 0xff;
uint32_t fg_green = (fg >> 8) & 0xff; uint32_t fg_green = (fg >> 8) & 0xff;
uint32_t fg_blue = fg & 0xff; uint32_t fg_blue = fg & 0xff;
@ -468,11 +470,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f); \ __m256 m255 = _mm256_set1_ps(255.0f); \
__m256 color = _mm256_set_ps( \ __m256 color = _mm256_set_ps( \
shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
__m256 fade = _mm256_set_ps( \ __m256 fade = _mm256_set_ps( \
shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
@ -488,11 +490,11 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
__m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \ __m256 mrcp_255 = _mm256_set1_ps(1.0f/255.0f); \
__m256 m255 = _mm256_set1_ps(255.0f); \ __m256 m255 = _mm256_set1_ps(255.0f); \
__m256 color = _mm256_set_ps( \ __m256 color = _mm256_set_ps( \
shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \ 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f), \
shade_constants.light_alpha * (1.0f/256.0f), shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \ 1.0f, shade_constants.light_red * (1.0f/256.0f), shade_constants.light_green * (1.0f/256.0f), shade_constants.light_blue * (1.0f/256.0f)); \
__m256 fade = _mm256_set_ps( \ __m256 fade = _mm256_set_ps( \
shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \ 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f), \
shade_constants.fade_alpha * (1.0f/256.0f), shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \ 0.0f, shade_constants.fade_red * (1.0f/256.0f), shade_constants.fade_green * (1.0f/256.0f), shade_constants.fade_blue * (1.0f/256.0f)); \
__m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \ __m256 fade_amount_hi = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_hi)); \
__m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \ __m256 fade_amount_lo = _mm256_mul_ps(fade, _mm256_sub_ps(_mm256_set1_ps(1.0f), mlight_lo)); \
__m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \ __m256 inv_desaturate = _mm256_set1_ps((256 - shade_constants.desaturate) * (1.0f/256.0f)); \
@ -585,39 +587,30 @@ FORCEINLINE uint32_t alpha_blend(uint32_t fg, uint32_t bg)
fg = _mm_adds_epu8(fg, bg); \ fg = _mm_adds_epu8(fg, bg); \
} }
/* // Calculates the final alpha values to be used when combined with the source texture alpha channel
FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) FORCEINLINE uint32_t calc_blend_bgalpha(uint32_t fg, uint32_t dest_alpha)
{ {
fg_alpha = src_alpha; uint32_t alpha = fg >> 24;
bg_alpha = dest_alpha; alpha += alpha >> 7;
return 256 - alpha; // (dest_alpha * (256 - alpha)) >> 8;
} }
#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ #define VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha) \
__m128i fg_alpha_hi = msrc_alpha; \ __m128i msrc_alpha = _mm_set1_epi16(src_alpha); \
__m128i fg_alpha_lo = msrc_alpha; \ __m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
__m128i bg_alpha_hi = mdest_alpha; \
__m128i bg_alpha_lo = mdest_alpha;
*/
// Calculates the final alpha values to be used when combined with the source texture alpha channel // Calculates the final alpha values to be used when combined with the source texture alpha channel
FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest_alpha, uint32_t &fg_alpha, uint32_t &bg_alpha) #define VEC_CALC_BLEND_ALPHA(fg) \
{ __m128i fg_alpha_hi, fg_alpha_lo, bg_alpha_hi, bg_alpha_lo; { \
fg_alpha = (fg >> 24) & 0xff; __m128i alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
fg_alpha += fg_alpha >> 7; __m128i alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
bg_alpha = (dest_alpha * (256 - fg_alpha)) >> 8; alpha_hi = _mm_add_epi16(alpha_hi, _mm_srli_epi16(alpha_hi, 7)); \
fg_alpha = (src_alpha * fg_alpha) >> 8; alpha_lo = _mm_add_epi16(alpha_lo, _mm_srli_epi16(alpha_lo, 7)); \
} bg_alpha_hi = _mm_sub_epi16(_mm_set1_epi16(256), alpha_hi); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_hi), mdest_alpha), 8);*/ \
bg_alpha_lo = _mm_sub_epi16(_mm_set1_epi16(256), alpha_lo); /* _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), alpha_lo), mdest_alpha), 8);*/ \
// Calculates the final alpha values to be used when combined with the source texture alpha channel fg_alpha_hi = msrc_alpha; \
#define VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha) \ fg_alpha_lo = msrc_alpha; \
__m128i fg_alpha_hi = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpackhi_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \ }
__m128i fg_alpha_lo = _mm_shufflehi_epi16(_mm_shufflelo_epi16(_mm_unpacklo_epi8(fg, _mm_setzero_si128()), _MM_SHUFFLE(3, 3, 3, 3)), _MM_SHUFFLE(3, 3, 3, 3)); \
fg_alpha_hi = _mm_add_epi16(fg_alpha_hi, _mm_srli_epi16(fg_alpha_hi, 7)); \
fg_alpha_lo = _mm_add_epi16(fg_alpha_lo, _mm_srli_epi16(fg_alpha_lo, 7)); \
__m128i bg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_hi), mdest_alpha), 8); \
__m128i bg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fg_alpha_lo), mdest_alpha), 8); \
fg_alpha_hi = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_hi, msrc_alpha), 8); \
fg_alpha_lo = _mm_srli_epi16(_mm_mullo_epi16(fg_alpha_lo, msrc_alpha), 8);
// Calculate constants for a simple shade // Calculate constants for a simple shade
#define SSE_SHADE_SIMPLE_INIT(light) \ #define SSE_SHADE_SIMPLE_INIT(light) \
@ -645,11 +638,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest
__m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \ __m128i mlight_hi = _mm_set_epi16(256, light, light, light, 256, light, light, light); \
__m128i mlight_lo = mlight_hi; \ __m128i mlight_lo = mlight_hi; \
__m128i color = _mm_set_epi16( \ __m128i color = _mm_set_epi16( \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
__m128i fade = _mm_set_epi16( \ __m128i fade = _mm_set_epi16( \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
__m128i fade_amount_lo = fade_amount_hi; \ __m128i fade_amount_lo = fade_amount_hi; \
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \
@ -659,11 +652,11 @@ FORCEINLINE void calc_blend_alpha(uint32_t fg, uint32_t src_alpha, uint32_t dest
__m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \ __m128i mlight_hi = _mm_set_epi16(256, light1, light1, light1, 256, light0, light0, light0); \
__m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \ __m128i mlight_lo = _mm_set_epi16(256, light3, light3, light3, 256, light2, light2, light2); \
__m128i color = _mm_set_epi16( \ __m128i color = _mm_set_epi16( \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, \
shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \ 256, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); \
__m128i fade = _mm_set_epi16( \ __m128i fade = _mm_set_epi16( \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, \
shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \ 0, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); \
__m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \ __m128i fade_amount_hi = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_hi)); \
__m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \ __m128i fade_amount_lo = _mm_mullo_epi16(fade, _mm_subs_epu16(_mm_set1_epi16(256), mlight_lo)); \
__m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \ __m128i inv_desaturate = _mm_set1_epi16(256 - shade_constants.desaturate); \

View file

@ -554,9 +554,7 @@ public:
if (shade_constants.simple_shade) if (shade_constants.simple_shade)
{ {
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
do do
{ {
@ -572,7 +570,7 @@ public:
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); VEC_CALC_BLEND_ALPHA(fg);
VEC_SHADE_SIMPLE(fg); VEC_SHADE_SIMPLE(fg);
__m128i bg = _mm_loadu_si128((const __m128i*)dest); __m128i bg = _mm_loadu_si128((const __m128i*)dest);
@ -593,9 +591,7 @@ public:
else else
{ {
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
do do
{ {
@ -610,7 +606,7 @@ public:
local_vplce[3] = local_vplce[3] + local_vince[3]; local_vplce[3] = local_vplce[3] + local_vince[3];
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); VEC_CALC_BLEND_ALPHA(fg);
VEC_SHADE(fg, shade_constants); VEC_SHADE(fg, shade_constants);
__m128i bg = _mm_loadu_si128((const __m128i*)dest); __m128i bg = _mm_loadu_si128((const __m128i*)dest);
@ -697,9 +693,7 @@ public:
if (shade_constants.simple_shade) if (shade_constants.simple_shade)
{ {
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
do do
{ {
@ -714,7 +708,7 @@ public:
local_vplce[3] = local_vplce[3] + local_vince[3]; local_vplce[3] = local_vplce[3] + local_vince[3];
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); VEC_CALC_BLEND_ALPHA(fg);
VEC_SHADE_SIMPLE(fg); VEC_SHADE_SIMPLE(fg);
__m128i bg = _mm_loadu_si128((const __m128i*)dest); __m128i bg = _mm_loadu_si128((const __m128i*)dest);
@ -735,9 +729,7 @@ public:
else else
{ {
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
do do
{ {
@ -752,7 +744,7 @@ public:
local_vplce[3] = local_vplce[3] + local_vince[3]; local_vplce[3] = local_vplce[3] + local_vince[3];
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); VEC_CALC_BLEND_ALPHA(fg);
VEC_SHADE(fg, shade_constants); VEC_SHADE(fg, shade_constants);
__m128i bg = _mm_loadu_si128((const __m128i*)dest); __m128i bg = _mm_loadu_si128((const __m128i*)dest);
@ -839,9 +831,7 @@ public:
if (shade_constants.simple_shade) if (shade_constants.simple_shade)
{ {
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
do do
{ {
@ -856,7 +846,7 @@ public:
local_vplce[3] = local_vplce[3] + local_vince[3]; local_vplce[3] = local_vplce[3] + local_vince[3];
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); VEC_CALC_BLEND_ALPHA(fg);
VEC_SHADE_SIMPLE(fg); VEC_SHADE_SIMPLE(fg);
__m128i bg = _mm_loadu_si128((const __m128i*)dest); __m128i bg = _mm_loadu_si128((const __m128i*)dest);
@ -877,9 +867,7 @@ public:
else else
{ {
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
do do
{ {
@ -894,7 +882,7 @@ public:
local_vplce[3] = local_vplce[3] + local_vince[3]; local_vplce[3] = local_vplce[3] + local_vince[3];
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); VEC_CALC_BLEND_ALPHA(fg);
VEC_SHADE(fg, shade_constants); VEC_SHADE(fg, shade_constants);
__m128i bg = _mm_loadu_si128((const __m128i*)dest); __m128i bg = _mm_loadu_si128((const __m128i*)dest);
@ -981,9 +969,7 @@ public:
if (shade_constants.simple_shade) if (shade_constants.simple_shade)
{ {
VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]); VEC_SHADE_SIMPLE_INIT4(light[3], light[2], light[1], light[0]);
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
do do
{ {
@ -998,7 +984,7 @@ public:
local_vplce[3] = local_vplce[3] + local_vince[3]; local_vplce[3] = local_vplce[3] + local_vince[3];
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); VEC_CALC_BLEND_ALPHA(fg);
VEC_SHADE_SIMPLE(fg); VEC_SHADE_SIMPLE(fg);
__m128i bg = _mm_loadu_si128((const __m128i*)dest); __m128i bg = _mm_loadu_si128((const __m128i*)dest);
@ -1019,9 +1005,7 @@ public:
else else
{ {
VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants); VEC_SHADE_INIT4(light[3], light[2], light[1], light[0], shade_constants);
VEC_CALC_BLEND_ALPHA_INIT(src_alpha, dest_alpha);
__m128i msrc_alpha = _mm_set1_epi16(src_alpha);
__m128i mdest_alpha = _mm_set1_epi16(dest_alpha);
do do
{ {
@ -1036,7 +1020,7 @@ public:
local_vplce[3] = local_vplce[3] + local_vince[3]; local_vplce[3] = local_vplce[3] + local_vince[3];
__m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0); __m128i fg = _mm_set_epi32(pix3, pix2, pix1, pix0);
VEC_CALC_BLEND_ALPHA(fg, msrc_alpha, mdest_alpha); VEC_CALC_BLEND_ALPHA(fg);
VEC_SHADE(fg, shade_constants); VEC_SHADE(fg, shade_constants);
__m128i bg = _mm_loadu_si128((const __m128i*)dest); __m128i bg = _mm_loadu_si128((const __m128i*)dest);

View file

@ -203,7 +203,10 @@ const uint32_t *FTexture::GetPixelsBgra()
PixelsBgra.resize(Width * Height); PixelsBgra.resize(Width * Height);
for (int i = 0; i < Width * Height; i++) for (int i = 0; i < Width * Height; i++)
{ {
if (indices[i] != 0)
PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d; PixelsBgra[i] = 0xff000000 | GPalette.BaseColors[indices[i]].d;
else
PixelsBgra[i] = 0;
} }
} }
return PixelsBgra.data(); return PixelsBgra.data();