diff --git a/src/swrenderer/drawers/r_draw_rgba.cpp b/src/swrenderer/drawers/r_draw_rgba.cpp index 91229d1aa..b02263692 100644 --- a/src/swrenderer/drawers/r_draw_rgba.cpp +++ b/src/swrenderer/drawers/r_draw_rgba.cpp @@ -59,7 +59,7 @@ CVAR(Bool, r_mipmap, true, CVAR_ARCHIVE | CVAR_GLOBALCONFIG); // Level of detail texture bias CVAR(Float, r_lod_bias, -1.5, 0); // To do: add CVAR_ARCHIVE | CVAR_GLOBALCONFIG when a good default has been decided -CVAR(Bool, r_phpdrawers, false, 0); +CVAR(Bool, r_phpdrawers, true, 0); namespace swrenderer { @@ -71,6 +71,46 @@ namespace swrenderer Queue->Push(args); } + void SWTruecolorDrawers::DrawWallMaskedColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawWallAddColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawWallAddClampColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawWallSubClampColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + + void SWTruecolorDrawers::DrawWallRevSubClampColumn(const WallDrawerArgs &args) + { + if (r_phpdrawers) + Queue->Push(args); + else + Queue->Push(args); + } + DrawSpanLLVMCommand::DrawSpanLLVMCommand(const SpanDrawerArgs &drawerargs) { auto shade_constants = drawerargs.ColormapConstants(); diff --git a/src/swrenderer/drawers/r_draw_rgba.h b/src/swrenderer/drawers/r_draw_rgba.h index 8e3d43fee..7bff1cffa 100644 --- a/src/swrenderer/drawers/r_draw_rgba.h +++ b/src/swrenderer/drawers/r_draw_rgba.h @@ -360,11 +360,11 @@ namespace swrenderer using SWPixelFormatDrawers::SWPixelFormatDrawers; void DrawWallColumn(const WallDrawerArgs &args) override; - void DrawWallMaskedColumn(const WallDrawerArgs &args) override { Queue->Push(args); } - void DrawWallAddColumn(const WallDrawerArgs &args) override { Queue->Push(args); } - void DrawWallAddClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } - void DrawWallSubClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } - void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override { Queue->Push(args); } + void DrawWallMaskedColumn(const WallDrawerArgs &args) override; + void DrawWallAddColumn(const WallDrawerArgs &args) override; + void DrawWallAddClampColumn(const WallDrawerArgs &args) override; + void DrawWallSubClampColumn(const WallDrawerArgs &args) override; + void DrawWallRevSubClampColumn(const WallDrawerArgs &args) override; void DrawSingleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } void DrawDoubleSkyColumn(const SkyDrawerArgs &args) override { Queue->Push(args); } void DrawColumn(const SpriteDrawerArgs &args) override { Queue->Push(args); } diff --git a/src/swrenderer/drawers/r_draw_wall32.h b/src/swrenderer/drawers/r_draw_wall32.h index f4e117588..a85b96805 100644 --- a/src/swrenderer/drawers/r_draw_wall32.h +++ b/src/swrenderer/drawers/r_draw_wall32.h @@ -71,7 +71,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -120,7 +119,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -141,8 +140,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -179,8 +178,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -198,7 +198,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -213,15 +212,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -241,8 +241,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -260,7 +261,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -281,8 +282,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -294,15 +295,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -359,7 +361,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -415,7 +416,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -436,8 +437,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -481,8 +482,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -500,7 +502,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -515,15 +516,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -550,8 +552,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -569,7 +572,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -590,8 +593,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -603,15 +606,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -675,7 +679,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -746,7 +749,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -767,8 +770,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -827,8 +830,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -846,7 +850,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -861,15 +864,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -911,8 +915,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -930,7 +935,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -951,8 +956,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -964,15 +969,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -1051,7 +1057,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1122,7 +1127,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1143,8 +1148,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -1203,8 +1208,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -1222,7 +1228,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1237,15 +1242,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -1287,8 +1293,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -1306,7 +1313,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1327,8 +1334,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -1340,15 +1347,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -1427,7 +1435,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1498,7 +1505,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1519,8 +1526,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -1579,8 +1586,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -1598,7 +1606,6 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1613,15 +1620,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend @@ -1663,8 +1671,9 @@ namespace swrenderer int light = 256 - (args.Light() >> (FRACBITS - 8)); __m128i mlight = _mm_set_epi16(256, light, light, light, 256, light, light, light); __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -1682,7 +1691,7 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + frac -= one / 2; __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -1703,8 +1712,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -1716,15 +1725,16 @@ namespace swrenderer __m128i fgcolor = _mm_unpacklo_epi8(_mm_cvtsi32_si128(ifgcolor), _mm_setzero_si128()); // Shade - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8); // Blend diff --git a/src/swrenderer/drawers/r_draw_wall32.php b/src/swrenderer/drawers/r_draw_wall32.php index cfac631e1..d51c52455 100644 --- a/src/swrenderer/drawers/r_draw_wall32.php +++ b/src/swrenderer/drawers/r_draw_wall32.php @@ -95,8 +95,9 @@ namespace swrenderer __m128i inv_light = _mm_set_epi16(0, 256 - light, 256 - light, 256 - light, 0, 256 - light, 256 - light, 256 - light); - __m128i inv_desaturate = _mm_setr_epi16(0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 0, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); + __m128i inv_desaturate = _mm_setr_epi16(256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate, 256 - shade_constants.desaturate); __m128i shade_fade = _mm_set_epi16(shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue, shade_constants.fade_alpha, shade_constants.fade_red, shade_constants.fade_green, shade_constants.fade_blue); + shade_fade = _mm_mullo_epi16(shade_fade, inv_light); __m128i shade_light = _mm_set_epi16(shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue, shade_constants.light_alpha, shade_constants.light_red, shade_constants.light_green, shade_constants.light_blue); int desaturate = shade_constants.desaturate; @@ -115,7 +116,10 @@ namespace swrenderer dest = thread->dest_for_thread(dest_y, pitch, dest); fracstep *= thread->num_cores; pitch *= thread->num_cores; - + + frac -= one / 2; + __m128i srcalpha = _mm_set1_epi16(args.SrcAlpha()); __m128i destalpha = _mm_set1_epi16(args.DestAlpha()); @@ -159,8 +163,8 @@ namespace swrenderer unsigned int p11 = source2[y1]; unsigned int inv_b = texturefracx; - unsigned int a = (frac_y1 >> (FRACBITS - 4)) & 15; - unsigned int inv_a = 16 - a; + unsigned int inv_a = (frac_y1 >> (FRACBITS - 4)) & 15; + unsigned int a = 16 - inv_a; unsigned int b = 16 - inv_b; unsigned int sred = (RPART(p00) * (a * b) + RPART(p01) * (inv_a * b) + RPART(p10) * (a * inv_b) + RPART(p11) * (inv_a * inv_b) + 127) >> 8; @@ -181,15 +185,16 @@ namespace swrenderer - int blue = BPART(ifgcolor); - int green = GPART(ifgcolor); - int red = RPART(ifgcolor); + int blue0 = BPART(ifgcolor); + int green0 = GPART(ifgcolor); + int red0 = RPART(ifgcolor); - __m128i intensity = _mm_set1_epi16(((red * 77 + green * 143 + blue * 37) >> 8) * desaturate); + int intensity0 = ((red0 * 77 + green0 * 143 + blue0 * 37) >> 8) * desaturate; + __m128i intensity = _mm_set_epi16(0, intensity0, intensity0, intensity0, 0, intensity0, intensity0, intensity0); fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(fgcolor, inv_desaturate), intensity), 8); - fgcolor = _mm_mullo_epi16(fgcolor, _mm_set1_epi16(light)); - fgcolor = _mm_srli_epi16(_mm_add_epi16(_mm_mullo_epi16(shade_fade, inv_light), fgcolor), 8); + fgcolor = _mm_mullo_epi16(fgcolor, mlight); + fgcolor = _mm_srli_epi16(_mm_add_epi16(shade_fade, fgcolor), 8); fgcolor = _mm_srli_epi16(_mm_mullo_epi16(fgcolor, shade_light), 8);