diff --git a/src/swrenderer/drawers/r_draw_sprite32.h b/src/swrenderer/drawers/r_draw_sprite32.h index f8d596642..1264d5e27 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.h +++ b/src/swrenderer/drawers/r_draw_sprite32.h @@ -931,7 +931,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -948,6 +951,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -975,7 +981,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -992,6 +1001,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -1108,7 +1120,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1125,6 +1140,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -1172,7 +1190,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1189,6 +1210,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -1676,7 +1700,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1693,6 +1720,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1750,7 +1780,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1767,6 +1800,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -1913,7 +1949,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -1930,6 +1969,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2007,7 +2049,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2024,6 +2069,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2540,7 +2588,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2557,6 +2608,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2614,7 +2668,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2631,6 +2688,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2777,7 +2837,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2794,6 +2857,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -2871,7 +2937,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -2888,6 +2957,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3404,7 +3476,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3421,6 +3496,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3478,7 +3556,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3495,6 +3576,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3641,7 +3725,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3658,6 +3745,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3735,7 +3825,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3752,6 +3845,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -3973,7 +4069,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -3990,6 +4089,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -4016,7 +4118,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4033,6 +4138,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -4285,7 +4393,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4302,6 +4413,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4358,7 +4472,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4375,6 +4492,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4656,7 +4776,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4673,6 +4796,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -4729,7 +4855,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -4746,6 +4875,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5027,7 +5159,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5044,6 +5179,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5100,7 +5238,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5117,6 +5258,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5588,7 +5732,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5605,6 +5752,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -5631,7 +5781,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5648,6 +5801,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend __m128i outcolor = fgcolor; @@ -5902,7 +6058,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5919,6 +6078,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -5975,7 +6137,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -5992,6 +6157,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -6275,7 +6443,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6292,6 +6463,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -6348,7 +6522,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6365,6 +6542,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -6648,7 +6828,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6665,6 +6848,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); @@ -6721,7 +6907,10 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)ifgcolor), _mm_setzero_si128()); // Shade - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -6738,6 +6927,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256)); // Blend uint32_t alpha0 = APART(ifgcolor[0]); diff --git a/src/swrenderer/drawers/r_draw_sprite32.php b/src/swrenderer/drawers/r_draw_sprite32.php index c66bba0c3..85b6e6003 100644 --- a/src/swrenderer/drawers/r_draw_sprite32.php +++ b/src/swrenderer/drawers/r_draw_sprite32.php @@ -278,15 +278,18 @@ namespace swrenderer function Shade($blendVariant, $isSimpleShade) { if ($blendVariant == "copy" || $blendVariant == "shaded") return; -?> - mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); - + mlight = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, mlight), 8); + __m128i lightcontrib = _mm_min_epi16(_mm_add_epi16(mlight, dynlight), _mm_set1_epi16(256)); + lightcontrib = _mm_sub_epi16(lightcontrib, mlight); + lightcontrib = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, lightcontrib), 8); + int blue0 = BPART(ifgcolor[0]); int green0 = GPART(ifgcolor[0]); int red0 = RPART(ifgcolor[0]); @@ -303,6 +306,9 @@ namespace swrenderer fgcolor = _mm_mullo_epi16(fgcolor, mlight); fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); + + fgcolor = _mm_add_epi16(fgcolor, lightcontrib); + fgcolor = _mm_min_epi16(fgcolor, _mm_set1_epi16(256));